Merge tag 'drm-intel-fixes-2017-12-22-1' of git://anongit.freedesktop.org/drm/drm...

author Dave Airlie <airlied@redhat.com>

Wed, 27 Dec 2017 19:20:07 +0000 (05:20 +1000)

committer Dave Airlie <airlied@redhat.com>

Wed, 27 Dec 2017 19:20:07 +0000 (05:20 +1000)
author Dave Airlie <airlied@redhat.com>
Wed, 27 Dec 2017 19:20:07 +0000 (05:20 +1000)
committer Dave Airlie <airlied@redhat.com>
Wed, 27 Dec 2017 19:20:07 +0000 (05:20 +1000)
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt

index 304bf22bb83cc0ec8dfbbcf2a48b206ecb781afb..fc1c884fea10497357f889b11e33c6d323fecf55 100644 (file)
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -75,3 +75,4 @@ stable kernels.
  | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
  | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
  | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
+| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt

index 779211fbb69ffac450f22b0ad6864c7c6c2bd98f..2cddab7efb20df0dcf07c4d7d64a5611138a8d7c 100644 (file)
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for
  normal scheduling policy and absolute bandwidth allocation model for
  realtime scheduling policy.
  
+WARNING: cgroup2 doesn't yet support control of realtime processes and
+the cpu controller can only be enabled when all RT processes are in
+the root cgroup.  Be aware that system management software may already
+have placed RT processes into nonroot cgroups during the system boot
+process, and these processes may need to be moved to the root cgroup
+before the cpu controller can be enabled.
+
  
  CPU Interface Files
  ~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt

index 376fa2f50e6bc9b41052928037acd4b3a382d380..956bb046e599d576e3f881b2901e0d369a3c9802 100644 (file)
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -13,7 +13,6 @@ Required properties:
                   at25df321a
                   at25df641
                   at26df081a
-                 en25s64
                   mr25h128
                   mr25h256
                   mr25h10
@@ -33,7 +32,6 @@ Required properties:
                   s25fl008k
                   s25fl064k
                   sst25vf040b
-                 sst25wf040b
                   m25p40
                   m25p80
                   m25p16
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt

index 5bf13960f7f4a3c826c10b1e15a618df82d82403..e3c48b20b1a691b37d0b425251a257c682a38eca 100644 (file)
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -12,24 +12,30 @@ Required properties:
    - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
  - reg : Offset and length of the register set for the device
  - interrupts : Should contain CSPI/eCSPI interrupt
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
  - clocks : Clock specifiers for both ipg and per clocks.
  - clock-names : Clock names should include both "ipg" and "per"
  See the clock consumer binding,
         Documentation/devicetree/bindings/clock/clock-bindings.txt
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-               Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
  
-Obsolete properties:
-- fsl,spi-num-chipselects : Contains the number of the chipselect
+Recommended properties:
+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
+select lines can be used, they appear to always generate a pulse between each
+word of a transfer.  Most use cases will require GPIO based chip selects to
+generate a valid transaction.
  
  Optional properties:
+- num-cs :  Number of total chip selects, see spi-bus.txt.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names, if present, should include "tx" and "rx".
  - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
  controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
  the SPI_READY mode-flag needs to be set too.
  Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
  
+Obsolete properties:
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+
  Example:
  
  ecspi@70010000 {
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt

index 8caa60734647f70a777b8a568ba9f2dd99182fb8..e6a5f4912b6d4a4910ed1d2fc494fff304ab47ff 100644 (file)
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -156,6 +156,40 @@ handle it in two different ways:
     root of the overlay.  Finally the directory is moved to the new
     location.
  
+There are several ways to tune the "redirect_dir" feature.
+
+Kernel config options:
+
+- OVERLAY_FS_REDIRECT_DIR:
+    If this is enabled, then redirect_dir is turned on by  default.
+- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
+    If this is enabled, then redirects are always followed by default. Enabling
+    this results in a less secure configuration.  Enable this option only when
+    worried about backward compatibility with kernels that have the redirect_dir
+    feature and follow redirects even if turned off.
+
+Module options (can also be changed through /sys/module/overlay/parameters/*):
+
+- "redirect_dir=BOOL":
+    See OVERLAY_FS_REDIRECT_DIR kernel config option above.
+- "redirect_always_follow=BOOL":
+    See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
+- "redirect_max=NUM":
+    The maximum number of bytes in an absolute redirect (default is 256).
+
+Mount options:
+
+- "redirect_dir=on":
+    Redirects are enabled.
+- "redirect_dir=follow":
+    Redirects are not created, but followed.
+- "redirect_dir=off":
+    Redirects are not created and only followed if "redirect_always_follow"
+    feature is enabled in the kernel/module config.
+- "redirect_dir=nofollow":
+    Redirects are not created and not followed (equivalent to "redirect_dir=off"
+    if "redirect_always_follow" feature is not enabled).
+
  Non-directories
  ---------------
  
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt

deleted file mode 100644 (file)

index bdf1423..0000000
--- a/Documentation/locking/crossrelease.txt
+++ /dev/null
@@ -1,874 +0,0 @@
-Crossrelease
-============
-
-Started by Byungchul Park <byungchul.park@lge.com>
-
-Contents:
-
- (*) Background
-
-     - What causes deadlock
-     - How lockdep works
-
- (*) Limitation
-
-     - Limit lockdep
-     - Pros from the limitation
-     - Cons from the limitation
-     - Relax the limitation
-
- (*) Crossrelease
-
-     - Introduce crossrelease
-     - Introduce commit
-
- (*) Implementation
-
-     - Data structures
-     - How crossrelease works
-
- (*) Optimizations
-
-     - Avoid duplication
-     - Lockless for hot paths
-
- (*) APPENDIX A: What lockdep does to work aggresively
-
- (*) APPENDIX B: How to avoid adding false dependencies
-
-
-==========
-Background
-==========
-
-What causes deadlock
---------------------
-
-A deadlock occurs when a context is waiting for an event to happen,
-which is impossible because another (or the) context who can trigger the
-event is also waiting for another (or the) event to happen, which is
-also impossible due to the same reason.
-
-For example:
-
-   A context going to trigger event C is waiting for event A to happen.
-   A context going to trigger event A is waiting for event B to happen.
-   A context going to trigger event B is waiting for event C to happen.
-
-A deadlock occurs when these three wait operations run at the same time,
-because event C cannot be triggered if event A does not happen, which in
-turn cannot be triggered if event B does not happen, which in turn
-cannot be triggered if event C does not happen. After all, no event can
-be triggered since any of them never meets its condition to wake up.
-
-A dependency might exist between two waiters and a deadlock might happen
-due to an incorrect releationship between dependencies. Thus, we must
-define what a dependency is first. A dependency exists between them if:
-
-   1. There are two waiters waiting for each event at a given time.
-   2. The only way to wake up each waiter is to trigger its event.
-   3. Whether one can be woken up depends on whether the other can.
-
-Each wait in the example creates its dependency like:
-
-   Event C depends on event A.
-   Event A depends on event B.
-   Event B depends on event C.
-
-   NOTE: Precisely speaking, a dependency is one between whether a
-   waiter for an event can be woken up and whether another waiter for
-   another event can be woken up. However from now on, we will describe
-   a dependency as if it's one between an event and another event for
-   simplicity.
-
-And they form circular dependencies like:
-
-    -> C -> A -> B -
-   /                \
-   \                /
-    ----------------
-
-   where 'A -> B' means that event A depends on event B.
-
-Such circular dependencies lead to a deadlock since no waiter can meet
-its condition to wake up as described.
-
-CONCLUSION
-
-Circular dependencies cause a deadlock.
-
-
-How lockdep works
------------------
-
-Lockdep tries to detect a deadlock by checking dependencies created by
-lock operations, acquire and release. Waiting for a lock corresponds to
-waiting for an event, and releasing a lock corresponds to triggering an
-event in the previous section.
-
-In short, lockdep does:
-
-   1. Detect a new dependency.
-   2. Add the dependency into a global graph.
-   3. Check if that makes dependencies circular.
-   4. Report a deadlock or its possibility if so.
-
-For example, consider a graph built by lockdep that looks like:
-
-   A -> B -
-           \
-            -> E
-           /
-   C -> D -
-
-   where A, B,..., E are different lock classes.
-
-Lockdep will add a dependency into the graph on detection of a new
-dependency. For example, it will add a dependency 'E -> C' when a new
-dependency between lock E and lock C is detected. Then the graph will be:
-
-       A -> B -
-               \
-                -> E -
-               /      \
-    -> C -> D -        \
-   /                   /
-   \                  /
-    ------------------
-
-   where A, B,..., E are different lock classes.
-
-This graph contains a subgraph which demonstrates circular dependencies:
-
-                -> E -
-               /      \
-    -> C -> D -        \
-   /                   /
-   \                  /
-    ------------------
-
-   where C, D and E are different lock classes.
-
-This is the condition under which a deadlock might occur. Lockdep
-reports it on detection after adding a new dependency. This is the way
-how lockdep works.
-
-CONCLUSION
-
-Lockdep detects a deadlock or its possibility by checking if circular
-dependencies were created after adding each new dependency.
-
-
-==========
-Limitation
-==========
-
-Limit lockdep
--------------
-
-Limiting lockdep to work on only typical locks e.g. spin locks and
-mutexes, which are released within the acquire context, the
-implementation becomes simple but its capacity for detection becomes
-limited. Let's check pros and cons in next section.
-
-
-Pros from the limitation
-------------------------
-
-Given the limitation, when acquiring a lock, locks in a held_locks
-cannot be released if the context cannot acquire it so has to wait to
-acquire it, which means all waiters for the locks in the held_locks are
-stuck. It's an exact case to create dependencies between each lock in
-the held_locks and the lock to acquire.
-
-For example:
-
-   CONTEXT X
-   ---------
-   acquire A
-   acquire B /* Add a dependency 'A -> B' */
-   release B
-   release A
-
-   where A and B are different lock classes.
-
-When acquiring lock A, the held_locks of CONTEXT X is empty thus no
-dependency is added. But when acquiring lock B, lockdep detects and adds
-a new dependency 'A -> B' between lock A in the held_locks and lock B.
-They can be simply added whenever acquiring each lock.
-
-And data required by lockdep exists in a local structure, held_locks
-embedded in task_struct. Forcing to access the data within the context,
-lockdep can avoid racy problems without explicit locks while handling
-the local data.
-
-Lastly, lockdep only needs to keep locks currently being held, to build
-a dependency graph. However, relaxing the limitation, it needs to keep
-even locks already released, because a decision whether they created
-dependencies might be long-deferred.
-
-To sum up, we can expect several advantages from the limitation:
-
-   1. Lockdep can easily identify a dependency when acquiring a lock.
-   2. Races are avoidable while accessing local locks in a held_locks.
-   3. Lockdep only needs to keep locks currently being held.
-
-CONCLUSION
-
-Given the limitation, the implementation becomes simple and efficient.
-
-
-Cons from the limitation
-------------------------
-
-Given the limitation, lockdep is applicable only to typical locks. For
-example, page locks for page access or completions for synchronization
-cannot work with lockdep.
-
-Can we detect deadlocks below, under the limitation?
-
-Example 1:
-
-   CONTEXT X      CONTEXT Y       CONTEXT Z
-   ---------      ---------       ----------
-                  mutex_lock A
-   lock_page B
-                  lock_page B
-                                  mutex_lock A /* DEADLOCK */
-                                  unlock_page B held by X
-                  unlock_page B
-                  mutex_unlock A
-                                  mutex_unlock A
-
-   where A and B are different lock classes.
-
-No, we cannot.
-
-Example 2:
-
-   CONTEXT X              CONTEXT Y
-   ---------              ---------
-                          mutex_lock A
-   mutex_lock A
-                          wait_for_complete B /* DEADLOCK */
-   complete B
-                          mutex_unlock A
-   mutex_unlock A
-
-   where A is a lock class and B is a completion variable.
-
-No, we cannot.
-
-CONCLUSION
-
-Given the limitation, lockdep cannot detect a deadlock or its
-possibility caused by page locks or completions.
-
-
-Relax the limitation
---------------------
-
-Under the limitation, things to create dependencies are limited to
-typical locks. However, synchronization primitives like page locks and
-completions, which are allowed to be released in any context, also
-create dependencies and can cause a deadlock. So lockdep should track
-these locks to do a better job. We have to relax the limitation for
-these locks to work with lockdep.
-
-Detecting dependencies is very important for lockdep to work because
-adding a dependency means adding an opportunity to check whether it
-causes a deadlock. The more lockdep adds dependencies, the more it
-thoroughly works. Thus Lockdep has to do its best to detect and add as
-many true dependencies into a graph as possible.
-
-For example, considering only typical locks, lockdep builds a graph like:
-
-   A -> B -
-           \
-            -> E
-           /
-   C -> D -
-
-   where A, B,..., E are different lock classes.
-
-On the other hand, under the relaxation, additional dependencies might
-be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
-added thanks to the relaxation, the graph will be:
-
-         A -> B -
-                 \
-                  -> E -> GX
-                 /
-   FX -> C -> D -
-
-   where A, B,..., E, FX and GX are different lock classes, and a suffix
-   'X' is added on non-typical locks.
-
-The latter graph gives us more chances to check circular dependencies
-than the former. However, it might suffer performance degradation since
-relaxing the limitation, with which design and implementation of lockdep
-can be efficient, might introduce inefficiency inevitably. So lockdep
-should provide two options, strong detection and efficient detection.
-
-Choosing efficient detection:
-
-   Lockdep works with only locks restricted to be released within the
-   acquire context. However, lockdep works efficiently.
-
-Choosing strong detection:
-
-   Lockdep works with all synchronization primitives. However, lockdep
-   suffers performance degradation.
-
-CONCLUSION
-
-Relaxing the limitation, lockdep can add additional dependencies giving
-additional opportunities to check circular dependencies.
-
-
-============
-Crossrelease
-============
-
-Introduce crossrelease
-----------------------
-
-In order to allow lockdep to handle additional dependencies by what
-might be released in any context, namely 'crosslock', we have to be able
-to identify those created by crosslocks. The proposed 'crossrelease'
-feature provoides a way to do that.
-
-Crossrelease feature has to do:
-
-   1. Identify dependencies created by crosslocks.
-   2. Add the dependencies into a dependency graph.
-
-That's all. Once a meaningful dependency is added into graph, then
-lockdep would work with the graph as it did. The most important thing
-crossrelease feature has to do is to correctly identify and add true
-dependencies into the global graph.
-
-A dependency e.g. 'A -> B' can be identified only in the A's release
-context because a decision required to identify the dependency can be
-made only in the release context. That is to decide whether A can be
-released so that a waiter for A can be woken up. It cannot be made in
-other than the A's release context.
-
-It's no matter for typical locks because each acquire context is same as
-its release context, thus lockdep can decide whether a lock can be
-released in the acquire context. However for crosslocks, lockdep cannot
-make the decision in the acquire context but has to wait until the
-release context is identified.
-
-Therefore, deadlocks by crosslocks cannot be detected just when it
-happens, because those cannot be identified until the crosslocks are
-released. However, deadlock possibilities can be detected and it's very
-worth. See 'APPENDIX A' section to check why.
-
-CONCLUSION
-
-Using crossrelease feature, lockdep can work with what might be released
-in any context, namely crosslock.
-
-
-Introduce commit
-----------------
-
-Since crossrelease defers the work adding true dependencies of
-crosslocks until they are actually released, crossrelease has to queue
-all acquisitions which might create dependencies with the crosslocks.
-Then it identifies dependencies using the queued data in batches at a
-proper time. We call it 'commit'.
-
-There are four types of dependencies:
-
-1. TT type: 'typical lock A -> typical lock B'
-
-   Just when acquiring B, lockdep can see it's in the A's release
-   context. So the dependency between A and B can be identified
-   immediately. Commit is unnecessary.
-
-2. TC type: 'typical lock A -> crosslock BX'
-
-   Just when acquiring BX, lockdep can see it's in the A's release
-   context. So the dependency between A and BX can be identified
-   immediately. Commit is unnecessary, too.
-
-3. CT type: 'crosslock AX -> typical lock B'
-
-   When acquiring B, lockdep cannot identify the dependency because
-   there's no way to know if it's in the AX's release context. It has
-   to wait until the decision can be made. Commit is necessary.
-
-4. CC type: 'crosslock AX -> crosslock BX'
-
-   When acquiring BX, lockdep cannot identify the dependency because
-   there's no way to know if it's in the AX's release context. It has
-   to wait until the decision can be made. Commit is necessary.
-   But, handling CC type is not implemented yet. It's a future work.
-
-Lockdep can work without commit for typical locks, but commit step is
-necessary once crosslocks are involved. Introducing commit, lockdep
-performs three steps. What lockdep does in each step is:
-
-1. Acquisition: For typical locks, lockdep does what it originally did
-   and queues the lock so that CT type dependencies can be checked using
-   it at the commit step. For crosslocks, it saves data which will be
-   used at the commit step and increases a reference count for it.
-
-2. Commit: No action is reauired for typical locks. For crosslocks,
-   lockdep adds CT type dependencies using the data saved at the
-   acquisition step.
-
-3. Release: No changes are required for typical locks. When a crosslock
-   is released, it decreases a reference count for it.
-
-CONCLUSION
-
-Crossrelease introduces commit step to handle dependencies of crosslocks
-in batches at a proper time.
-
-
-==============
-Implementation
-==============
-
-Data structures
----------------
-
-Crossrelease introduces two main data structures.
-
-1. hist_lock
-
-   This is an array embedded in task_struct, for keeping lock history so
-   that dependencies can be added using them at the commit step. Since
-   it's local data, it can be accessed locklessly in the owner context.
-   The array is filled at the acquisition step and consumed at the
-   commit step. And it's managed in circular manner.
-
-2. cross_lock
-
-   One per lockdep_map exists. This is for keeping data of crosslocks
-   and used at the commit step.
-
-
-How crossrelease works
-----------------------
-
-It's the key of how crossrelease works, to defer necessary works to an
-appropriate point in time and perform in at once at the commit step.
-Let's take a look with examples step by step, starting from how lockdep
-works without crossrelease for typical locks.
-
-   acquire A /* Push A onto held_locks */
-   acquire B /* Push B onto held_locks and add 'A -> B' */
-   acquire C /* Push C onto held_locks and add 'B -> C' */
-   release C /* Pop C from held_locks */
-   release B /* Pop B from held_locks */
-   release A /* Pop A from held_locks */
-
-   where A, B and C are different lock classes.
-
-   NOTE: This document assumes that readers already understand how
-   lockdep works without crossrelease thus omits details. But there's
-   one thing to note. Lockdep pretends to pop a lock from held_locks
-   when releasing it. But it's subtly different from the original pop
-   operation because lockdep allows other than the top to be poped.
-
-In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
-dependency every time acquiring a lock.
-
-After adding 'A -> B', a dependency graph will be:
-
-   A -> B
-
-   where A and B are different lock classes.
-
-And after adding 'B -> C', the graph will be:
-
-   A -> B -> C
-
-   where A, B and C are different lock classes.
-
-Let's performs commit step even for typical locks to add dependencies.
-Of course, commit step is not necessary for them, however, it would work
-well because this is a more general way.
-
-   acquire A
-   /*
-    * Queue A into hist_locks
-    *
-    * In hist_locks: A
-    * In graph: Empty
-    */
-
-   acquire B
-   /*
-    * Queue B into hist_locks
-    *
-    * In hist_locks: A, B
-    * In graph: Empty
-    */
-
-   acquire C
-   /*
-    * Queue C into hist_locks
-    *
-    * In hist_locks: A, B, C
-    * In graph: Empty
-    */
-
-   commit C
-   /*
-    * Add 'C -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire C: Nothing
-    *
-    * In hist_locks: A, B, C
-    * In graph: Empty
-    */
-
-   release C
-
-   commit B
-   /*
-    * Add 'B -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire B: C
-    *
-    * In hist_locks: A, B, C
-    * In graph: 'B -> C'
-    */
-
-   release B
-
-   commit A
-   /*
-    * Add 'A -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire A: B, C
-    *
-    * In hist_locks: A, B, C
-    * In graph: 'B -> C', 'A -> B', 'A -> C'
-    */
-
-   release A
-
-   where A, B and C are different lock classes.
-
-In this case, dependencies are added at the commit step as described.
-
-After commits for A, B and C, the graph will be:
-
-   A -> B -> C
-
-   where A, B and C are different lock classes.
-
-   NOTE: A dependency 'A -> C' is optimized out.
-
-We can see the former graph built without commit step is same as the
-latter graph built using commit steps. Of course the former way leads to
-earlier finish for building the graph, which means we can detect a
-deadlock or its possibility sooner. So the former way would be prefered
-when possible. But we cannot avoid using the latter way for crosslocks.
-
-Let's look at how commit steps work for crosslocks. In this case, the
-commit step is performed only on crosslock AX as real. And it assumes
-that the AX release context is different from the AX acquire context.
-
-   BX RELEASE CONTEXT             BX ACQUIRE CONTEXT
-   ------------------             ------------------
-                                  acquire A
-                                  /*
-                                   * Push A onto held_locks
-                                   * Queue A into hist_locks
-                                   *
-                                   * In held_locks: A
-                                   * In hist_locks: A
-                                   * In graph: Empty
-                                   */
-
-                                  acquire BX
-                                  /*
-                                   * Add 'the top of held_locks -> BX'
-                                   *
-                                   * In held_locks: A
-                                   * In hist_locks: A
-                                   * In graph: 'A -> BX'
-                                   */
-
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-   It must be guaranteed that the following operations are seen after
-   acquiring BX globally. It can be done by things like barrier.
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-   acquire C
-   /*
-    * Push C onto held_locks
-    * Queue C into hist_locks
-    *
-    * In held_locks: C
-    * In hist_locks: C
-    * In graph: 'A -> BX'
-    */
-
-   release C
-   /*
-    * Pop C from held_locks
-    *
-    * In held_locks: Empty
-    * In hist_locks: C
-    * In graph: 'A -> BX'
-    */
-                                  acquire D
-                                  /*
-                                   * Push D onto held_locks
-                                   * Queue D into hist_locks
-                                   * Add 'the top of held_locks -> D'
-                                   *
-                                   * In held_locks: A, D
-                                   * In hist_locks: A, D
-                                   * In graph: 'A -> BX', 'A -> D'
-                                   */
-   acquire E
-   /*
-    * Push E onto held_locks
-    * Queue E into hist_locks
-    *
-    * In held_locks: E
-    * In hist_locks: C, E
-    * In graph: 'A -> BX', 'A -> D'
-    */
-
-   release E
-   /*
-    * Pop E from held_locks
-    *
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D'
-    */
-                                  release D
-                                  /*
-                                   * Pop D from held_locks
-                                   *
-                                   * In held_locks: A
-                                   * In hist_locks: A, D
-                                   * In graph: 'A -> BX', 'A -> D'
-                                   */
-   commit BX
-   /*
-    * Add 'BX -> ?'
-    * What has been queued since acquire BX: C, E
-    *
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D',
-    *           'BX -> C', 'BX -> E'
-    */
-
-   release BX
-   /*
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D',
-    *           'BX -> C', 'BX -> E'
-    */
-                                  release A
-                                  /*
-                                   * Pop A from held_locks
-                                   *
-                                   * In held_locks: Empty
-                                   * In hist_locks: A, D
-                                   * In graph: 'A -> BX', 'A -> D',
-                                   *           'BX -> C', 'BX -> E'
-                                   */
-
-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-Crossrelease considers all acquisitions after acqiuring BX are
-candidates which might create dependencies with BX. True dependencies
-will be determined when identifying the release context of BX. Meanwhile,
-all typical locks are queued so that they can be used at the commit step.
-And then two dependencies 'BX -> C' and 'BX -> E' are added at the
-commit step when identifying the release context.
-
-The final graph will be, with crossrelease:
-
-               -> C
-              /
-       -> BX -
-      /       \
-   A -         -> E
-      \
-       -> D
-
-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-However, the final graph will be, without crossrelease:
-
-   A -> D
-
-   where A and D are different lock classes.
-
-The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
-'BX -> E' giving additional opportunities to check if they cause
-deadlocks. This way lockdep can detect a deadlock or its possibility
-caused by crosslocks.
-
-CONCLUSION
-
-We checked how crossrelease works with several examples.
-
-
-=============
-Optimizations
-=============
-
-Avoid duplication
------------------
-
-Crossrelease feature uses a cache like what lockdep already uses for
-dependency chains, but this time it's for caching CT type dependencies.
-Once that dependency is cached, the same will never be added again.
-
-
-Lockless for hot paths
-----------------------
-
-To keep all locks for later use at the commit step, crossrelease adopts
-a local array embedded in task_struct, which makes access to the data
-lockless by forcing it to happen only within the owner context. It's
-like how lockdep handles held_locks. Lockless implmentation is important
-since typical locks are very frequently acquired and released.
-
-
-=================================================
-APPENDIX A: What lockdep does to work aggresively
-=================================================
-
-A deadlock actually occurs when all wait operations creating circular
-dependencies run at the same time. Even though they don't, a potential
-deadlock exists if the problematic dependencies exist. Thus it's
-meaningful to detect not only an actual deadlock but also its potential
-possibility. The latter is rather valuable. When a deadlock occurs
-actually, we can identify what happens in the system by some means or
-other even without lockdep. However, there's no way to detect possiblity
-without lockdep unless the whole code is parsed in head. It's terrible.
-Lockdep does the both, and crossrelease only focuses on the latter.
-
-Whether or not a deadlock actually occurs depends on several factors.
-For example, what order contexts are switched in is a factor. Assuming
-circular dependencies exist, a deadlock would occur when contexts are
-switched so that all wait operations creating the dependencies run
-simultaneously. Thus to detect a deadlock possibility even in the case
-that it has not occured yet, lockdep should consider all possible
-combinations of dependencies, trying to:
-
-1. Use a global dependency graph.
-
-   Lockdep combines all dependencies into one global graph and uses them,
-   regardless of which context generates them or what order contexts are
-   switched in. Aggregated dependencies are only considered so they are
-   prone to be circular if a problem exists.
-
-2. Check dependencies between classes instead of instances.
-
-   What actually causes a deadlock are instances of lock. However,
-   lockdep checks dependencies between classes instead of instances.
-   This way lockdep can detect a deadlock which has not happened but
-   might happen in future by others but the same class.
-
-3. Assume all acquisitions lead to waiting.
-
-   Although locks might be acquired without waiting which is essential
-   to create dependencies, lockdep assumes all acquisitions lead to
-   waiting since it might be true some time or another.
-
-CONCLUSION
-
-Lockdep detects not only an actual deadlock but also its possibility,
-and the latter is more valuable.
-
-
-==================================================
-APPENDIX B: How to avoid adding false dependencies
-==================================================
-
-Remind what a dependency is. A dependency exists if:
-
-   1. There are two waiters waiting for each event at a given time.
-   2. The only way to wake up each waiter is to trigger its event.
-   3. Whether one can be woken up depends on whether the other can.
-
-For example:
-
-   acquire A
-   acquire B /* A dependency 'A -> B' exists */
-   release B
-   release A
-
-   where A and B are different lock classes.
-
-A depedency 'A -> B' exists since:
-
-   1. A waiter for A and a waiter for B might exist when acquiring B.
-   2. Only way to wake up each is to release what it waits for.
-   3. Whether the waiter for A can be woken up depends on whether the
-      other can. IOW, TASK X cannot release A if it fails to acquire B.
-
-For another example:
-
-   TASK X                         TASK Y
-   ------                         ------
-                                  acquire AX
-   acquire B /* A dependency 'AX -> B' exists */
-   release B
-   release AX held by Y
-
-   where AX and B are different lock classes, and a suffix 'X' is added
-   on crosslocks.
-
-Even in this case involving crosslocks, the same rule can be applied. A
-depedency 'AX -> B' exists since:
-
-   1. A waiter for AX and a waiter for B might exist when acquiring B.
-   2. Only way to wake up each is to release what it waits for.
-   3. Whether the waiter for AX can be woken up depends on whether the
-      other can. IOW, TASK X cannot release AX if it fails to acquire B.
-
-Let's take a look at more complicated example:
-
-   TASK X                         TASK Y
-   ------                         ------
-   acquire B
-   release B
-   fork Y
-                                  acquire AX
-   acquire C /* A dependency 'AX -> C' exists */
-   release C
-   release AX held by Y
-
-   where AX, B and C are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-Does a dependency 'AX -> B' exist? Nope.
-
-Two waiters are essential to create a dependency. However, waiters for
-AX and B to create 'AX -> B' cannot exist at the same time in this
-example. Thus the dependency 'AX -> B' cannot be created.
-
-It would be ideal if the full set of true ones can be considered. But
-we can ensure nothing but what actually happened. Relying on what
-actually happens at runtime, we can anyway add only true ones, though
-they might be a subset of true ones. It's similar to how lockdep works
-for typical locks. There might be more true dependencies than what
-lockdep has detected in runtime. Lockdep has no choice but to rely on
-what actually happens. Crossrelease also relies on it.
-
-CONCLUSION
-
-Relying on what actually happens, lockdep can avoid adding false
-dependencies.
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt

index 89fff7d611ccb533a5c3d375bc94fecf3c2e0687..0b3a1148f9f0414558ed0537b4219225162ccc3a 100644 (file)
--- a/Documentation/vm/zswap.txt
+++ b/Documentation/vm/zswap.txt
@@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its
  original compressor.  Once all pages are removed from an old zpool, the zpool
  and its compressor are freed.
  
+Some of the pages in zswap are same-value filled pages (i.e. contents of the
+page have same value or repetitive pattern). These pages include zero-filled
+pages and they are handled differently. During store operation, a page is
+checked if it is a same-value filled page before compressing it. If true, the
+compressed length of the page is set to zero and the pattern or same-filled
+value is stored.
+
+Same-value filled pages identification feature is enabled by default and can be
+disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
+e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
+runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
+
+echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
+
+When zswap same-filled page identification is disabled at runtime, it will stop
+checking for the same-value filled pages during store operation. However, the
+existing pages which are marked as same-value filled pages remain stored
+unchanged in zswap until they are either loaded or invalidated.
+
  A debugfs interface is provided for various statistic about pool size, number
-of pages stored, and various counters for the reasons pages are rejected.
+of pages stored, same-value filled pages and various counters for the reasons
+pages are rejected.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt

index 3448e675b4623ce81b5e0bc1116c52a12c411801..51101708a03ae1c22ad4a16c4b750a2af165a521 100644 (file)
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -1,6 +1,4 @@
  
-<previous description obsolete, deleted>
-
  Virtual memory map with 4 level page tables:
  
  0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
  ... unused hole ...
  ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
  ... unused hole ...
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
  ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
  ... unused hole ...
  ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
  ... unused hole ...
  ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
  ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
  
  Virtual memory map with 5 level page tables:
@@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
  ... unused hole ...
  ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
  ... unused hole ...
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
  ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
  ... unused hole ...
  ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
  ... unused hole ...
  ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
  ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
  
  Architecture defines a 64-bit virtual address. Implementations can support
  less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are set to either all ones
-or all zero. This causes hole between user space and kernel addresses.
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
  
  The direct mapping covers all memory in the system up to the highest
  memory address (this means in some cases it can also include PCI memory
@@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
  the processes using the page fault handler, with init_top_pgt as
  reference.
  
-Current X86-64 implementations support up to 46 bits of address space (64 TB),
-which is our current limit. This expands into MBZ space in the page tables.
-
  We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
  memory window (this size is arbitrary, it can be raised later if needed).
  The mappings are not part of any other kernel PGD and are only available
@@ -72,5 +72,3 @@ following fixmap section.
  Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
  physical memory, vmalloc/ioremap space and virtual memory map are randomized.
  Their order is preserved but their base will be offset early at boot time.
-
--Andi Kleen, Jul 2004
diff --git a/MAINTAINERS b/MAINTAINERS

index 82ad0eabce4f3e3955dfab54c7df2cae03644268..a6e86e20761e143ca976d4f8170e60b603bb5ed9 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5431,7 +5431,7 @@ F:        drivers/media/tuners/fc2580*
  
  FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
  M:     Johannes Thumshirn <jth@kernel.org>
-L:     fcoe-devel@open-fcoe.org
+L:     linux-scsi@vger.kernel.org
  W:     www.Open-FCoE.org
  S:     Supported
  F:     drivers/scsi/libfc/
@@ -13117,6 +13117,7 @@ F:      drivers/dma/dw/
  
  SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
  M:     Jie Deng <jiedeng@synopsys.com>
+M:     Jose Abreu <Jose.Abreu@synopsys.com>
  L:     netdev@vger.kernel.org
  S:     Supported
  F:     drivers/net/ethernet/synopsys/
diff --git a/Makefile b/Makefile

index 3f4d157add54018fbea707a8129f2326b463dec6..ac8c441866b70d0b447b37ef3746374a4040849e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
  VERSION = 4
  PATCHLEVEL = 15
  SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc5
  NAME = Fearless Coyote
  
  # *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts

index fbb3758ca2e3e77d4a65e47bb9229429305a0015..4b8edc8982cf156931177b0ecce0f6b9329afce3 100644 (file)
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
                                         switch0port10: port@10 {
                                                 reg = <10>;
                                                 label = "dsa";
-                                               phy-mode = "xgmii";
+                                               phy-mode = "xaui";
                                                 link = <&switch1port10>;
                                         };
                                 };
@@ -208,7 +208,7 @@
                                         switch1port10: port@10 {
                                                 reg = <10>;
                                                 label = "dsa";
-                                               phy-mode = "xgmii";
+                                               phy-mode = "xaui";
                                                 link = <&switch0port10>;
                                         };
                                 };
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S

index 1712f132b80d2402d94d72ea974a0c3326fa2f52..b83fdc06286a64ece150fb7e419bc587e47c3e34 100644 (file)
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
                 .pushsection .text.fixup,"ax"
                 .align  4
  9001:          mov     r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+               ldr     r5, [sp, #9*4]          @ *err_ptr
+#else
                 ldr     r5, [sp, #8*4]          @ *err_ptr
+#endif
                 str     r4, [r5]
                 ldmia   sp, {r1, r2}            @ retrieve dst, len
                 add     r2, r2, r1
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index a93339f5178f2eff247144eb9244c077225094bc..c9a7e9e1414f344c9dfd515600e3e4378bf61d81 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
  
           If unsure, say Y.
  
-
  config SOCIONEXT_SYNQUACER_PREITS
         bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
         default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
           a 128kB offset to be applied to the target address in this commands.
  
           If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_E1041
+       bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
+       default y
+       help
+         Falkor CPU may speculatively fetch instructions from an improper
+         memory location when MMU translation is changed from SCTLR_ELn[M]=1
+         to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
+
+         If unsure, say Y.
+
  endmenu
  
  
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h

index aef72d886677758c76d6b932c863893df7c67b53..8b168280976f25de43539ed1b4dbed9b952fcfde 100644 (file)
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
  #endif
         .endm
  
+/**
+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
+ */
+       .macro pre_disable_mmu_workaround
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+       isb
+#endif
+       .endm
+
  #endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h

index ac67cfc2585a8af417405958779b792d60b06e6a..060e3a4008abd18e4a5aa48bb5b6fa1674a735c3 100644 (file)
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
  #define FTR_VISIBLE    true    /* Feature visible to the user space */
  #define FTR_HIDDEN     false   /* Feature is hidden from the user */
  
+#define FTR_VISIBLE_IF_IS_ENABLED(config)              \
+       (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
+
  struct arm64_ftr_bits {
         bool            sign;   /* Value is signed ? */
         bool            visible;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h

index 235e77d982610a0114f62cc833278994cc2e76b2..cbf08d7cbf3089949bb4ada755a36383e6e5db2e 100644 (file)
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
  #define BRCM_CPU_PART_VULCAN           0x516
  
  #define QCOM_CPU_PART_FALKOR_V1                0x800
+#define QCOM_CPU_PART_FALKOR           0xC00
  
  #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
  #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -99,6 +100,7 @@
  #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
  #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
  #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
  
  #ifndef __ASSEMBLY__
  
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h

index 149d05fb9421520bd659b62627941ed36ce46bb3..bdcc7f1c9d069df3d95c6884b8071cdba530cfc8 100644 (file)
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
  #include <asm/cmpxchg.h>
  #include <asm/fixmap.h>
  #include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
  
  extern void __pte_error(const char *file, int line, unsigned long val);
  extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
  
  static inline pte_t pte_mkclean(pte_t pte)
  {
-       return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+       pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+       pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+       return pte;
  }
  
  static inline pte_t pte_mkdirty(pte_t pte)
  {
-       return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+       pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+       if (pte_write(pte))
+               pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+       return pte;
  }
  
  static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
         }
  }
  
-struct mm_struct;
-struct vm_area_struct;
-
  extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
  
  /*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
          * hardware updates of the pte (ptep_set_access_flags safely changes
          * valid ptes without going through an invalid entry).
          */
-       if (pte_valid(*ptep) && pte_valid(pte)) {
+       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+          (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
                 VM_WARN_ONCE(!pte_young(pte),
                              "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
                              __func__, pte_val(*ptep), pte_val(pte));
@@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  
  /*
- * ptep_set_wrprotect - mark read-only while preserving the hardware update of
- * the Access Flag.
+ * ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
   */
  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
  {
         pte_t old_pte, pte;
  
-       /*
-        * ptep_set_wrprotect() is only called on CoW mappings which are
-        * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
-        * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
-        * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
-        * protection_map[]. There is no race with the hardware update of the
-        * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
-        * is set.
-        */
-       VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
-                    "%s: potential race with hardware DBM", __func__);
         pte = READ_ONCE(*ptep);
         do {
                 old_pte = pte;
+               /*
+                * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+                * clear), set the PTE_DIRTY bit.
+                */
+               if (pte_hw_dirty(pte))
+                       pte = pte_mkdirty(pte);
                 pte = pte_wrprotect(pte);
                 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
                                                pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S

index 65f42d2574142d4b37bebf49ed1fc3cdccbb56ae..2a752cb2a0f35a82f2a60e744d160af9b5f6c6a1 100644 (file)
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
         mrs     x12, sctlr_el1
         ldr     x13, =SCTLR_ELx_FLAGS
         bic     x12, x12, x13
+       pre_disable_mmu_workaround
         msr     sctlr_el1, x12
         isb
  
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

index c5ba0097887f93e9d30b37355b84e5750d74d04e..a73a5928f09b26ae7b2de7b3c2217e5c975de4a0 100644 (file)
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
  };
  
  static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                                  FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
         ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
         S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
         S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S

index 4e6ad355bd058e6a4ab73a0f94832a7b1fe719a6..6b9736c3fb5630ab31c17b662b5c5cfe2b7d0832 100644 (file)
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
         mrs     x0, sctlr_el2
         bic     x0, x0, #1 << 0 // clear SCTLR.M
         bic     x0, x0, #1 << 2 // clear SCTLR.C
+       pre_disable_mmu_workaround
         msr     sctlr_el2, x0
         isb
         b       2f
@@ -103,6 +104,7 @@ ENTRY(entry)
         mrs     x0, sctlr_el1
         bic     x0, x0, #1 << 0 // clear SCTLR.M
         bic     x0, x0, #1 << 2 // clear SCTLR.C
+       pre_disable_mmu_workaround
         msr     sctlr_el1, x0
         isb
  2:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c

index 540a1e010eb519a6223e091d5f9f00b716fc2c7b..fae81f7964b4f226242961607cb087a20710e22b 100644 (file)
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1043,7 +1043,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
  
         local_bh_disable();
  
-       current->thread.fpsimd_state = *state;
+       current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
         if (system_supports_sve() && test_thread_flag(TIF_SVE))
                 fpsimd_to_sve(current);
  
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S

index 67e86a0f57ac43edcee10d89bd5db2e050ae1621..e3cb9fbf96b66c3ba2d4327d4c1a4b3ca734ef1f 100644 (file)
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
          * to take into account by discarding the current kernel mapping and
          * creating a new one.
          */
+       pre_disable_mmu_workaround
         msr     sctlr_el1, x20                  // disable the MMU
         isb
         bl      __create_page_tables            // recreate kernel mapping
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c

index 749f81779420c7bab2ead0d8f5b9a6cf108e6a45..74bb56f656eff024839df19897ba06512128e9bb 100644 (file)
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
  #include <linux/perf_event.h>
  #include <linux/ptrace.h>
  #include <linux/smp.h>
+#include <linux/uaccess.h>
  
  #include <asm/compat.h>
  #include <asm/current.h>
@@ -36,7 +37,6 @@
  #include <asm/traps.h>
  #include <asm/cputype.h>
  #include <asm/system_misc.h>
-#include <asm/uaccess.h>
  
  /* Breakpoint currently in use for each BRP. */
  static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S

index ce704a4aeadd438bf637472bb7037b89fba15087..f407e422a7200b86072349cc70e1e6d5e7e1753b 100644 (file)
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
         mrs     x0, sctlr_el2
         ldr     x1, =SCTLR_ELx_FLAGS
         bic     x0, x0, x1
+       pre_disable_mmu_workaround
         msr     sctlr_el2, x0
         isb
  1:
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S

index 3f9615582377661a88fab8be6a12365d625d830a..870828c364c508f825eacc1c49c17886dc9c8cb2 100644 (file)
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
         mrs     x5, sctlr_el2
         ldr     x6, =SCTLR_ELx_FLAGS
         bic     x5, x5, x6              // Clear SCTL_M and etc
+       pre_disable_mmu_workaround
         msr     sctlr_el2, x5
         isb
  
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c

index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..f4363d40e2cd7fd62d40d826d5296c95f15cde9f 100644 (file)
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
  {
         u64 reg;
  
+       /* Clear pmscr in case of early return */
+       *pmscr_el1 = 0;
+
         /* SPE present on this CPU? */
         if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
                                                   ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c

index ca74a2aace425b95ed95ecf0e70a78188621004e..7b60d62ac5939e83c8e153ec1c3a0447565f23eb 100644 (file)
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
                 .check_wx = true,
         };
  
-       walk_pgd(&st, &init_mm, 0);
+       walk_pgd(&st, &init_mm, VA_START);
         note_page(&st, 0, 0, 0);
         if (st.wx_pages || st.uxn_pages)
                 pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c

index 22168cd0dde73e06698bc40b166867df17a00134..9b7f89df49dbfe108da2eadc59421ce99a4432b4 100644 (file)
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
  {
         struct siginfo info;
         const struct fault_info *inf;
-       int ret = 0;
  
         inf = esr_to_fault_info(esr);
         pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                 if (interrupts_enabled(regs))
                         nmi_enter();
  
-               ret = ghes_notify_sea();
+               ghes_notify_sea();
  
                 if (interrupts_enabled(regs))
                         nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                 info.si_addr  = (void __user *)addr;
         arm64_notify_die("", regs, &info, esr);
  
-       return ret;
+       return 0;
  }
  
  static const struct fault_info fault_info[] = {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

index 5960bef0170df85916d0c1ac3b65f570f0af67ea..00e7b900ca4193e83dfa7de7dd506984afe90bce 100644 (file)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
  
         reserve_elfcorehdr();
  
+       high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+
         dma_contiguous_reserve(arm64_dma_phys_limit);
  
         memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
         sparse_init();
         zone_sizes_init(min, max);
  
-       high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
         memblock_dump_all();
  }
  
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c

index 9345b44b86f036572e33721eb80e9bbbe4493aa4..f57118e1f6b4265257799ae2cf8ea356077e20b9 100644 (file)
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
         while ((nuline = strchr(s, '\n')) != NULL) {
                 if (nuline != s)
                         pdc_iodc_print(s, nuline - s);
-                       pdc_iodc_print("\r\n", 2);
-                       s = nuline + 1;
+               pdc_iodc_print("\r\n", 2);
+               s = nuline + 1;
         }
         if (*s != '\0')
                 pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h

index c980a02a52bc0dda0a23b205f59d1d86438553f2..598c8d60fa5e602cc9303e1986ada9680d64feb3 100644 (file)
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
  
  /* thread information allocation */
  
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER      2 /* PA-RISC requires at least 16k stack */
+#else
  #define THREAD_SIZE_ORDER      3 /* PA-RISC requires at least 32k stack */
+#endif
+
  /* Be sure to hunt all references to this down when you change the size of
   * the kernel stack */
  #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S

index a4fd296c958e8e14f13a913aca50510b11eb49b7..f3cecf5117cf8ab14724f0ea3535220c3224d569 100644 (file)
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
         STREG   %r19,PT_SR7(%r16)
  
  intr_return:
-       /* NOTE: Need to enable interrupts incase we schedule. */
-       ssm     PSW_SM_I, %r0
-
         /* check for reschedule */
         mfctl   %cr30,%r1
         LDREG   TI_FLAGS(%r1),%r19      /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
         LDREG   PT_IASQ1(%r16), %r20
         cmpib,COND(=),n 0,%r20,intr_restore /* backward */
  
+       /* NOTE: We need to enable interrupts if we have to deliver
+        * signals. We used to do this earlier but it caused kernel
+        * stack overflows. */
+       ssm     PSW_SM_I, %r0
+
         copy    %r0, %r25                       /* long in_syscall = 0 */
  #ifdef CONFIG_64BIT
         ldo     -16(%r30),%r29                  /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
         cmpib,COND(=)   0, %r20, intr_do_preempt
         nop
  
+       /* NOTE: We need to enable interrupts if we schedule.  We used
+        * to do this earlier but it caused kernel stack overflows. */
+       ssm     PSW_SM_I, %r0
+
  #ifdef CONFIG_64BIT
         ldo     -16(%r30),%r29          /* Reference param save area */
  #endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S

index e3a8e5e4d5de75897adcea4134f87c7246f60646..8d072c44f300c16d45ba8f4ee0c2eee6435e4ddd 100644 (file)
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
  
  
         __INITRODATA
+       .align 4
         .export os_hpmc_size
  os_hpmc_size:
         .word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c

index 5a657986ebbf4bef7beff4e8c8d20f1343872347..143f90e2f9f3c631616d4af52f0fe3fa08f44af9 100644 (file)
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
  #include <linux/slab.h>
  #include <linux/kallsyms.h>
  #include <linux/sort.h>
-#include <linux/sched.h>
  
  #include <linux/uaccess.h>
  #include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c

index 7eab4bb8abe630b14c54c3b457285b4228607dc6..66e506520505d8a3245d49d492831df5e3bbb42a 100644 (file)
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
  #include <linux/preempt.h>
  #include <linux/init.h>
  
-#include <asm/processor.h>
  #include <asm/delay.h>
-
  #include <asm/special_insns.h>    /* for mfctl() */
  #include <asm/processor.h> /* for boot_cpu_data */
  
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h

index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a..e2a2b8400490049143edee40316313a906ca6db7 100644 (file)
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
  #endif
  }
  
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
  {
+       return 0;
  }
  
  #ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c

index 5acb5a176dbe5c8bffe6ddb7458b7d3ac2b7019f..72be0c32e902a35fa45e5ed02036df91999dda58 100644 (file)
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
  
         printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
                regs->nip, regs->link, regs->ctr);
-       printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+       printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
                regs, regs->trap, print_tainted(), init_utsname()->release);
         printk("MSR:  "REG" ", regs->msr);
         print_msr_bits(regs->msr);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c

index bf457843e03217b9aa02815d7791f0fce72aea2b..0d750d274c4e21a3324eb3505bbd73c86a58cdc9 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
  
         /* Return the per-cpu state for state saving/migration */
         return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
-              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+              (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
  }
  
  int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
  
         /*
          * Restore P and Q. If the interrupt was pending, we
-        * force both P and Q, which will trigger a resend.
+        * force Q and !P, which will trigger a resend.
          *
          * That means that a guest that had both an interrupt
          * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
          * is perfectly fine as coalescing interrupts that haven't
          * been presented yet is always allowed.
          */
-       if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+       if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
                 state->old_p = true;
         if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
                 state->old_q = true;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c

index 46d74e81aff1b4caad4769e7686fa0a800695cd4..d183b4801bdbded832b90d2aa1a18e713f70695b 100644 (file)
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
                         func = (u8 *) __bpf_call_base + imm;
  
                         /* Save skb pointer if we need to re-cache skb data */
-                       if (bpf_helper_changes_pkt_data(func))
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func))
                                 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
  
                         bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
                         PPC_MR(b2p[BPF_REG_0], 3);
  
                         /* refresh skb cache */
-                       if (bpf_helper_changes_pkt_data(func)) {
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func)) {
                                 /* reload skb pointer to r3 */
                                 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
                                 bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c

index 1538129663658381b6b1a425dcbf582b1ed09531..fce545774d50afc6093c28ad2f4127c24ed5331c 100644 (file)
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
         int ret;
         __u64 target;
  
-       if (is_kernel_addr(addr))
-               return branch_target((unsigned int *)addr);
+       if (is_kernel_addr(addr)) {
+               if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+                       return 0;
+
+               return branch_target(&instr);
+       }
  
         /* Userspace: need copy instruction here then translate it */
         pagefault_disable();
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c

index 0ead3cd73caa2f8816e8c04f47cca691efba0560..be4e7f84f70a59db60e92a9bfe845678f71cc608 100644 (file)
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
         if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
                 return 0;
  
+       /*
+        * Check whether nest_imc is registered. We could end up here if the
+        * cpuhotplug callback registration fails. i.e, callback invokes the
+        * offline path for all successfully registered nodes. At this stage,
+        * nest_imc pmu will not be registered and we should return here.
+        *
+        * We return with a zero since this is not an offline failure. And
+        * cpuhp_setup_state() returns the actual failure reason to the caller,
+        * which in turn will call the cleanup routine.
+        */
+       if (!nest_pmus)
+               return 0;
+
         /*
          * Now that this cpu is one of the designated,
          * find a next cpu a) which is online and b) in same chip.
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                 if (nest_pmus == 1) {
                         cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
                         kfree(nest_imc_refc);
+                       kfree(per_nest_pmu_arr);
                 }
  
                 if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
         kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
         kfree(pmu_ptr);
-       kfree(per_nest_pmu_arr);
         return;
  }
  
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
                         ret = nest_pmu_cpumask_init();
                         if (ret) {
                                 mutex_unlock(&nest_init_lock);
+                               kfree(nest_imc_refc);
+                               kfree(per_nest_pmu_arr);
                                 goto err_free;
                         }
                 }
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h

index 773c4e039cd7288bcd25ed53ce831db84c766f26..c0319cbf1eec58d7ea8960259838b865c23f49a1 100644 (file)
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,25 @@
  #define smp_rmb()      RISCV_FENCE(r,r)
  #define smp_wmb()      RISCV_FENCE(w,w)
  
+/*
+ * This is a very specific barrier: it's currently only used in two places in
+ * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
+ * orderings it guarantees, but the "critical section is RCsc" guarantee
+ * mandates a barrier on RISC-V.  The sequence looks like:
+ *
+ *    lr.aq lock
+ *    sc    lock <= LOCKED
+ *    smp_mb__after_spinlock()
+ *    // critical section
+ *    lr    lock
+ *    sc.rl lock <= UNLOCKED
+ *
+ * The AQ/RL pair provides a RCpc critical section, but there's not really any
+ * way we can take advantage of that here because the ordering is only enforced
+ * on that one lock.  Thus, we're just doing a full fence.
+ */
+#define smp_mb__after_spinlock()       RISCV_FENCE(rw,rw)
+
  #include <asm-generic/barrier.h>
  
  #endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c

index 8fbb6749910d42473d814b37eb255f812d4d206b..cb7b0c63014ecbc61c8d9a2b8263a65dd1775d11 100644 (file)
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
  #include <asm/tlbflush.h>
  #include <asm/thread_info.h>
  
-#ifdef CONFIG_HVC_RISCV_SBI
-#include <asm/hvc_riscv_sbi.h>
-#endif
-
  #ifdef CONFIG_DUMMY_CONSOLE
  struct screen_info screen_info = {
         .orig_video_lines       = 30,
@@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
  
  void __init setup_arch(char **cmdline_p)
  {
-#if defined(CONFIG_HVC_RISCV_SBI)
-       if (likely(early_console == NULL)) {
-               early_console = &riscv_sbi_early_console_dev;
-               register_console(early_console);
-       }
-#endif
-
  #ifdef CONFIG_CMDLINE_BOOL
  #ifdef CONFIG_CMDLINE_OVERRIDE
         strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c

index a2ae936a093e4f91d4def96fff0dd46e6e0226c0..79c78668258ede202086c072c63352ca14585903 100644 (file)
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
         bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
  
         /* Check the reserved flags. */
-       if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
+       if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
                 return -EINVAL;
  
         flush_icache_mm(mm, local);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h

index 57d7bc92e0b8a766d24520ea5234fca56971b646..0a6b0286c32e9e0a7283d9cfb66dc357fe2e36fa 100644 (file)
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
         return pud;
  }
  
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
-       return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
-}
-
  static inline pud_t pud_mkclean(pud_t pud)
  {
         if (pud_large(pud)) {
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c

index f04db3779b34507f9dd38791fc89131505d5f0c3..59eea9c65d3e9e8595d509001b1c794420060887 100644 (file)
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
                 return retval;
         }
  
+       groups_sort(group_info);
         retval = set_current_groups(group_info);
         put_group_info(group_info);
  
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c

index e81c16838b90f1bc9a5418bc1b4e5365e9cb0aef..9557d8b516df5a689dda995cd7fd501ddd6cf54c 100644 (file)
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
  #define SEEN_LITERAL   8       /* code uses literals */
  #define SEEN_FUNC      16      /* calls C functions */
  #define SEEN_TAIL_CALL 32      /* code uses tail calls */
-#define SEEN_SKB_CHANGE        64      /* code changes skb data */
-#define SEEN_REG_AX    128     /* code uses constant blinding */
+#define SEEN_REG_AX    64      /* code uses constant blinding */
  #define SEEN_STACK     (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
  
  /*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                         EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                       REG_15, 152);
         }
-       if (jit->seen & SEEN_SKB)
+       if (jit->seen & SEEN_SKB) {
                 emit_load_skb_data_hlen(jit);
-       if (jit->seen & SEEN_SKB_CHANGE)
                 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
                 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
                               STK_OFF_SKBP);
+       }
  }
  
  /*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                 EMIT2(0x0d00, REG_14, REG_W1);
                 /* lgr %b0,%r2: load return value into %b0 */
                 EMIT4(0xb9040000, BPF_REG_0, REG_2);
-               if (bpf_helper_changes_pkt_data((void *)func)) {
-                       jit->seen |= SEEN_SKB_CHANGE;
+               if ((jit->seen & SEEN_SKB) &&
+                   bpf_helper_changes_pkt_data((void *)func)) {
                         /* lg %b1,ST_OFF_SKBP(%r15) */
                         EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
                                       REG_15, STK_OFF_SKBP);
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c

index be3136f142a9993e0c6c8cfa1d651b1685654a73..a8103a84b4ac4a2ec84c44c302862b3aed8b7e7f 100644 (file)
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
         if (!printk_ratelimit())
                 return;
  
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c

index 815c03d7a765524424b92866b1567ea2a43695d4..41363f46797bf9f74dd922fadbd2a3f190e8c9bb 100644 (file)
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
         if (!printk_ratelimit())
                 return;
  
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c

index 33c0f8bb0f33de0c6beadd3dd8a9bef6253bcb10..5335ba3c850ed3acdc074ffe639d3ddac101f2ad 100644 (file)
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
         if (!(pmd_val(pmd) & _PAGE_VALID))
                 return 0;
  
-       if (!pmd_access_permitted(pmd, write))
+       if (write && !pmd_write(pmd))
                 return 0;
  
         refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
         if (!(pud_val(pud) & _PAGE_VALID))
                 return 0;
  
-       if (!pud_access_permitted(pud, write))
+       if (write && !pud_write(pud))
                 return 0;
  
         refs = 0;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c

index 5765e7e711f78248d2bff70f9c57ca48a4514355..ff5f9cb3039af1f91c8701915f08c051c21d0d81 100644 (file)
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                 u8 *func = ((u8 *)__bpf_call_base) + imm;
  
                 ctx->saw_call = true;
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
  
                 emit_call((u32 *)func, ctx);
                 emit_nop(ctx);
  
                 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
  
-               if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
-                       load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       load_skb_regs(ctx, L7);
                 break;
         }
  
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild

index 50a32c33d729ba2a570eadaf77cff69925218c42..73c57f614c9e0600a5b4df7b28a7fa55f4abe471 100644 (file)
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
  generic-y += barrier.h
+generic-y += bpf_perf_event.h
  generic-y += bug.h
  generic-y += clkdev.h
  generic-y += current.h
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h

index b668e351fd6c2e4f7a4b75c8a67eada77449abc9..fca34b2177e28a055663055d01c4fb7d78420285 100644 (file)
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
  /*
   * Needed since we do not use the asm-generic/mm_hooks.h:
   */
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
  {
         uml_setup_stubs(mm);
+       return 0;
  }
  extern void arch_exit_mmap(struct mm_struct *mm);
  static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c

index 4e6fcb32620ffb2125f648622499e5bf7c950e72..428644175956231aad112a0ce221452913736635 100644 (file)
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
         if (!printk_ratelimit())
                 return;
  
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
                 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                 tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
                 (void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h

index 59b06b48f27d7a4e0d8b82fc147d3fdad7f75295..5c205a9cb5a6a4bb2c865255bc946d7ca4882db1 100644 (file)
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
         } \
  } while (0)
  
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
  {
+       return 0;
  }
  
  static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 8eed3f94bfc774de5e3f344590f8889a999dea9c..d4fc98c50378c40bc901f6446d2bfff68151eb6a 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
  config NR_CPUS
         int "Maximum number of CPUs" if SMP && !MAXSMP
         range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+       range 2 64 if SMP && X86_32 && X86_BIGSMP
+       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
         range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
         default "1" if !SMP
         default "8192" if MAXSMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug

index 6293a8768a9123038eeced9e8dc2c4874feed275..672441c008c73ae3d949b974b128ef167d7d486a 100644 (file)
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
  config UNWINDER_GUESS
         bool "Guess unwinder"
         depends on EXPERT
+       depends on !STACKDEPOT
         ---help---
           This option enables the "guess" unwinder for unwinding kernel stack
           traces.  It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile

index 1e9c322e973af0e1024dc3751b99d16eb76574f7..f25e1530e0644c83d8c69b1a90436c54ce823ae4 100644 (file)
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
  ifdef CONFIG_X86_64
         vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
         vmlinux-objs-y += $(obj)/mem_encrypt.o
+       vmlinux-objs-y += $(obj)/pgtable_64.o
  endif
  
  $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S

index 20919b4f31330fbc36528e47cf7dd010e516c31c..fc313e29fe2c4be637ff7d41bfafdc7a29144ba4 100644 (file)
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
         leaq    boot_stack_end(%rbx), %rsp
  
  #ifdef CONFIG_X86_5LEVEL
-       /* Check if 5-level paging has already enabled */
-       movq    %cr4, %rax
-       testl   $X86_CR4_LA57, %eax
-       jnz     lvl5
+       /*
+        * Check if we need to enable 5-level paging.
+        * RSI holds real mode data and need to be preserved across
+        * a function call.
+        */
+       pushq   %rsi
+       call    l5_paging_required
+       popq    %rsi
+
+       /* If l5_paging_required() returned zero, we're done here. */
+       cmpq    $0, %rax
+       je      lvl5
  
         /*
          * At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c

index b50c42455e25257bff89dd2d9d5f23534340076e..98761a1576ceb5c21b2d8c7e98c1217fd48abb26 100644 (file)
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
         }
  }
  
+static bool l5_supported(void)
+{
+       /* Check if leaf 7 is supported. */
+       if (native_cpuid_eax(0) < 7)
+               return 0;
+
+       /* Check if la57 is supported. */
+       return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
  #if CONFIG_X86_NEED_RELOCS
  static void handle_relocations(void *output, unsigned long output_len,
                                unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
         console_init();
         debug_putstr("early console in extract_kernel\n");
  
+       if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+               error("This linux kernel as configured requires 5-level paging\n"
+                       "This CPU does not support the required 'cr4.la57' feature\n"
+                       "Unable to boot - please use a kernel appropriate for your CPU\n");
+       }
+
         free_mem_ptr     = heap;        /* Heap */
         free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
  
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c

new file mode 100644 (file)

index 0000000..b4469a3
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+       /* Check if leaf 7 is supported. */
+
+       if (native_cpuid_eax(0) < 7)
+               return 0;
+
+       /* Check if la57 is supported. */
+       if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+               return 0;
+
+       /* Check if 5-level paging has already been enabled. */
+       if (native_read_cr4() & X86_CR4_LA57)
+               return 0;
+
+       return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh

index 49f4970f693b3bddacad0b2965acad0ae112cb03..c9e8499fbfe75c0a98d0223247fa6cb0746198bb 100644 (file)
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
  
  # Make sure the files actually exist
  verify "$FBZIMAGE"
-verify "$MTOOLSRC"
  
  genbzdisk() {
+       verify "$MTOOLSRC"
         mformat a:
         syslinux $FIMAGE
         echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
  }
  
  genfdimage144() {
+       verify "$MTOOLSRC"
         dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
         mformat v:
         syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
  }
  
  genfdimage288() {
+       verify "$MTOOLSRC"
         dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
         mformat w:
         syslinux $FIMAGE
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c

index 399a29d067d6367603714633fb8c4de6ab77275a..cb91a64a99e7cdbc0422227383611378fb6b076a 100644 (file)
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
  
         salsa20_ivsetup(ctx, walk.iv);
  
-       if (likely(walk.nbytes == nbytes))
-       {
-               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-                                     walk.dst.virt.addr, nbytes);
-               return blkcipher_walk_done(desc, &walk, 0);
-       }
-
         while (walk.nbytes >= 64) {
                 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
                                       walk.dst.virt.addr,
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S

index 4838037f97f6edffda62b5b045c837fcc29402f0..ace8f321a5a1f2d1331cc4331a1922c9ed3d8bc1 100644 (file)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,9 +941,10 @@ ENTRY(debug)
         movl    %esp, %eax                      # pt_regs pointer
  
         /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
         jb      .Ldebug_from_sysenter_stack
  
         TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
         movl    %esp, %eax                      # pt_regs pointer
  
         /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
         jb      .Lnmi_from_sysenter_stack
  
         /* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S

index f81d50d7ceacdefa06d61482687937096c68421c..3d19c830e1b1ab3c7e3115014039a35eb9607214 100644 (file)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -140,6 +140,64 @@ END(native_usergs_sysret64)
   * with them due to bugs in both AMD and Intel CPUs.
   */
  
+       .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address).  So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline.  We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH    CPU_ENTRY_AREA_entry_stack + \
+                       SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+       swapgs
+
+       /* Stash the user RSP. */
+       movq    %rsp, RSP_SCRATCH
+
+       /* Load the top of the task stack into RSP */
+       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+       /* Start building the simulated IRET frame. */
+       pushq   $__USER_DS                      /* pt_regs->ss */
+       pushq   RSP_SCRATCH                     /* pt_regs->sp */
+       pushq   %r11                            /* pt_regs->flags */
+       pushq   $__USER_CS                      /* pt_regs->cs */
+       pushq   %rcx                            /* pt_regs->ip */
+
+       /*
+        * x86 lacks a near absolute jump, and we can't jump to the real
+        * entry text with a relative jump.  We could push the target
+        * address and then use retq, but this destroys the pipeline on
+        * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
+        * spill RDI and restore it in a second-stage trampoline.
+        */
+       pushq   %rdi
+       movq    $entry_SYSCALL_64_stage2, %rdi
+       jmp     *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+       .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+       UNWIND_HINT_EMPTY
+       popq    %rdi
+       jmp     entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
  ENTRY(entry_SYSCALL_64)
         UNWIND_HINT_EMPTY
         /*
@@ -330,8 +388,24 @@ syscall_return_via_sysret:
         popq    %rsi    /* skip rcx */
         popq    %rdx
         popq    %rsi
+
+       /*
+        * Now all regs are restored except RSP and RDI.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
         popq    %rdi
-       movq    RSP-ORIG_RAX(%rsp), %rsp
+       popq    %rsp
         USERGS_SYSRET64
  END(entry_SYSCALL_64)
  
@@ -466,12 +540,13 @@ END(irq_entries_start)
  
  .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
  #ifdef CONFIG_DEBUG_ENTRY
-       pushfq
-       testl $X86_EFLAGS_IF, (%rsp)
+       pushq %rax
+       SAVE_FLAGS(CLBR_RAX)
+       testl $X86_EFLAGS_IF, %eax
         jz .Lokay_\@
         ud2
  .Lokay_\@:
-       addq $8, %rsp
+       popq %rax
  #endif
  .endm
  
@@ -563,6 +638,13 @@ END(irq_entries_start)
  /* 0(%rsp): ~(interrupt number) */
         .macro interrupt func
         cld
+
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jz      1f
+       SWAPGS
+       call    switch_to_thread_stack
+1:
+
         ALLOC_PT_GPREGS_ON_STACK
         SAVE_C_REGS
         SAVE_EXTRA_REGS
@@ -572,12 +654,8 @@ END(irq_entries_start)
         jz      1f
  
         /*
-        * IRQ from user mode.  Switch to kernel gsbase and inform context
-        * tracking that we're in kernel mode.
-        */
-       SWAPGS
-
-       /*
+        * IRQ from user mode.
+        *
          * We need to tell lockdep that IRQs are off.  We can't do this until
          * we fix gsbase, and we should do it before enter_from_user_mode
          * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
         ud2
  1:
  #endif
-       SWAPGS
         POP_EXTRA_REGS
-       POP_C_REGS
-       addq    $8, %rsp        /* skip regs->orig_ax */
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rax
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+
+       /*
+        * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+       pushq   5*8(%rdi)       /* RSP */
+       pushq   4*8(%rdi)       /* EFLAGS */
+       pushq   3*8(%rdi)       /* CS */
+       pushq   2*8(%rdi)       /* RIP */
+
+       /* Push user RDI on the trampoline stack. */
+       pushq   (%rdi)
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
+       /* Restore RDI. */
+       popq    %rdi
+       SWAPGS
         INTERRUPT_RETURN
  
  
@@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
  /*
   * Exception entry points.
   */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack.  This is called with the IRET frame and
+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+       UNWIND_HINT_FUNC
+
+       pushq   %rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
+       ret
+END(switch_to_thread_stack)
  
  .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
  ENTRY(\sym)
@@ -848,11 +983,12 @@ ENTRY(\sym)
  
         ALLOC_PT_GPREGS_ON_STACK
  
-       .if \paranoid
-       .if \paranoid == 1
+       .if \paranoid < 2
         testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
-       jnz     1f
+       jnz     .Lfrom_usermode_switch_stack_\@
         .endif
+
+       .if \paranoid
         call    paranoid_entry
         .else
         call    error_entry
@@ -894,20 +1030,15 @@ ENTRY(\sym)
         jmp     error_exit
         .endif
  
-       .if \paranoid == 1
+       .if \paranoid < 2
         /*
-        * Paranoid entry from userspace.  Switch stacks and treat it
+        * Entry from userspace.  Switch stacks and treat it
          * as a normal entry.  This means that paranoid handlers
          * run in real process context if user_mode(regs).
          */
-1:
+.Lfrom_usermode_switch_stack_\@:
         call    error_entry
  
-
-       movq    %rsp, %rdi                      /* pt_regs pointer */
-       call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-
         movq    %rsp, %rdi                      /* pt_regs pointer */
  
         .if \has_error_code
@@ -1170,6 +1301,14 @@ ENTRY(error_entry)
         SWAPGS
  
  .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    sync_regs
+       movq    %rax, %rsp                      /* switch stack */
+       ENCODE_FRAME_POINTER
+       pushq   %r12
+
         /*
          * We need to tell lockdep that IRQs are off.  We can't do this until
          * we fix gsbase, and we should do it before enter_from_user_mode
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S

index 568e130d932cd2a7d44393e5fc52408cffe64f34..95ad40eb7effbdb6f605285df62d1e0bd33a6cac 100644 (file)
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,7 @@
   */
  ENTRY(entry_SYSENTER_compat)
         /* Interrupts are off on entry. */
-       SWAPGS_UNSAFE_STACK
+       SWAPGS
         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
  
         /*
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
          */
         movl    %eax, %eax
  
-       /* Construct struct pt_regs on stack (iret frame is already on stack) */
         pushq   %rax                    /* pt_regs->orig_ax */
+
+       /* switch to thread stack expects orig_ax to be pushed */
+       call    switch_to_thread_stack
+
         pushq   %rdi                    /* pt_regs->di */
         pushq   %rsi                    /* pt_regs->si */
         pushq   %rdx                    /* pt_regs->dx */
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c

index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..1faf40f2dda9a862f974d4f06616402875e32ffc 100644 (file)
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
  #include <asm/unistd.h>
  #include <asm/fixmap.h>
  #include <asm/traps.h>
+#include <asm/paravirt.h>
  
  #define CREATE_TRACE_POINTS
  #include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
  
         WARN_ON_ONCE(address != regs->ip);
  
+       /* This should be unreachable in NATIVE mode. */
+       if (WARN_ON(vsyscall_mode == NATIVE))
+               return false;
+
         if (vsyscall_mode == NONE) {
                 warn_bad_vsyscall(KERN_INFO, regs,
                                   "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
         return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
  }
  
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+static void __init set_vsyscall_pgtable_user_bits(void)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_k(VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+       p4d->p4d |= _PAGE_USER;
+#endif
+       pud = pud_offset(p4d, VSYSCALL_ADDR);
+       set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+       pmd = pmd_offset(pud, VSYSCALL_ADDR);
+       set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
  void __init map_vsyscall(void)
  {
         extern char __vsyscall_page;
         unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
  
-       if (vsyscall_mode != NONE)
+       if (vsyscall_mode != NONE) {
                 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                              vsyscall_mode == NATIVE
                              ? PAGE_KERNEL_VSYSCALL
                              : PAGE_KERNEL_VVAR);
+               set_vsyscall_pgtable_user_bits();
+       }
  
         BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                      (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h

new file mode 100644 (file)

index 0000000..2fbc69a
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code.  Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+
+       /*
+        * The GDT is just below entry_stack and thus serves (on x86_64) as
+        * a a read-only guard page.
+        */
+       struct entry_stack_page entry_stack_page;
+
+       /*
+        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+        * we need task switches to work, and task switches write to the TSS.
+        */
+       struct tss_struct tss;
+
+       char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+       /*
+        * Exception stacks used for IST entries.
+        *
+        * In the future, this should have a separate slot for each stack
+        * with guard pages between them.
+        */
+       char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE    (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE        (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define        CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU         (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR    ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE                        \
+       (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+       return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h

index bf6a76202a779ee131b4df8c89449ab52abd0a79..ea9a7dde62e5c4d551ba89e429f911fb5c6603fd 100644 (file)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
         set_bit(bit, (unsigned long *)cpu_caps_set);    \
  } while (0)
  
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
  #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
  /*
   * Static testing of CPU features.  Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h

index 4011cb03ef08e52db15f52779ce366c26359a34b..ec8be07c0cda5c9b240d351ca583409713c58406 100644 (file)
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
  #include <asm/mmu.h>
  #include <asm/fixmap.h>
  #include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
  
  #include <linux/smp.h>
  #include <linux/percpu.h>
@@ -60,17 +61,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
         return this_cpu_ptr(&gdt_page)->gdt;
  }
  
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
-       return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
  /* Provide the fixmap address of the remapped GDT */
  static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
  {
-       unsigned int idx = get_cpu_gdt_ro_index(cpu);
-       return (struct desc_struct *)__fix_to_virt(idx);
+       return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
  }
  
  /* Provide the current read-only GDT */
@@ -185,7 +179,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
  #endif
  }
  
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
  {
         struct desc_struct *d = get_cpu_gdt_rw(cpu);
         tss_desc tss;
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h

index 0211029076ea8b9ed6648b9bf298c99c8b2124ad..6777480d8a427eaaa07559f77985c125aa66bb6c 100644 (file)
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
  #ifndef _ASM_X86_ESPFIX_H
  #define _ASM_X86_ESPFIX_H
  
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
  
  #include <asm/percpu.h>
  
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
  
  extern void init_espfix_bsp(void);
  extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
  
  #endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h

index b0c505fe9a958c701fef6d96f281bb8ab1a773de..64c4a30e0d39621ff8587fc8da538cd3d1d9f144 100644 (file)
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
                          PAGE_SIZE)
  #endif
  
-
  /*
   * Here we define all the compile-time 'special' virtual
   * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
         FIX_IO_APIC_BASE_0,
         FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
  #endif
-       FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
  #ifdef CONFIG_X86_32
         FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
         FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
  #ifdef CONFIG_X86_INTEL_MID
         FIX_LNW_VRTC,
  #endif
-       /* Fixmap entries to remap the GDTs, one per processor. */
-       FIX_GDT_REMAP_BEGIN,
-       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
  
  #ifdef CONFIG_ACPI_APEI_GHES
         /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
  extern void reserve_top_address(unsigned long reserve);
  
  #define FIXADDR_SIZE   (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START          (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
  
  extern int fixmaps_set;
  
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h

index 1b0a5abcd8aeb6e700013c5434aaeb0bba7a152f..96aa6b9884dc5b3bc8d54c9ef1c6258eea13a0d0 100644 (file)
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
  #ifndef _ASM_X86_HYPERVISOR_H
  #define _ASM_X86_HYPERVISOR_H
  
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types  */
  enum x86_hypervisor_type {
         X86_HYPER_NATIVE = 0,
         X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
         X86_HYPER_KVM,
  };
  
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
  struct hypervisor_x86 {
         /* Hypervisor name */
         const char      *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
  
  extern enum x86_hypervisor_type x86_hyper_type;
  extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return x86_hyper_type == type;
+}
  #else
  static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return type == X86_HYPER_NATIVE;
+}
  #endif /* CONFIG_HYPERVISOR_GUEST */
  #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h

new file mode 100644 (file)

index 0000000..989cfa8
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+                            unsigned long type)
+{
+       struct { u64 d[2]; } desc = { { pcid, addr } };
+
+       /*
+        * The memory clobber is because the whole point is to invalidate
+        * stale TLB entries and, especially if we're flushing global
+        * mappings, we don't want the compiler to reorder any subsequent
+        * memory accesses before the TLB flush.
+        *
+        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+        * invpcid (%rcx), %rax in long mode.
+        */
+       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR                0
+#define INVPCID_TYPE_SINGLE_CTXT       1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
+#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+                                    unsigned long addr)
+{
+       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h

index c8ef23f2c28f17c59308b9c41179c47f85e075ad..89f08955fff733c688a5ce4f4a0b8d74050ee617 100644 (file)
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
         swapgs;                                 \
         sysretl
  
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x)          pushfq; popq %rax
+#endif
  #else
  #define INTERRUPT_RETURN               iret
  #define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h

index f86a8caa561e8873c3f34e6e8b8cd509ebadd819..395c9631e000a3a17aa574c1b25fcc2cafd5b5fb 100644 (file)
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
  extern int __must_check __die(const char *, struct pt_regs *, long);
  extern void show_stack_regs(struct pt_regs *regs);
  extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
  extern unsigned long oops_begin(void);
  extern void oops_end(unsigned long, struct pt_regs *, int signr);
  
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h

index 9ea26f16749706fddd5b15e8bf557a9e6156e165..5ff3e8af2c2056b7fe19560ee2ba1ad7146aaf2a 100644 (file)
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
  #define _ASM_X86_MMU_H
  
  #include <linux/spinlock.h>
+#include <linux/rwsem.h>
  #include <linux/mutex.h>
  #include <linux/atomic.h>
  
@@ -27,7 +28,8 @@ typedef struct {
         atomic64_t tlb_gen;
  
  #ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
+       struct rw_semaphore     ldt_usr_sem;
+       struct ldt_struct       *ldt;
  #endif
  
  #ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index 6d16d15d09a0daed96a1e3d670b6203d1779b98e..5ede7cae1d673e38effa7ce9d1cc5aaf4481ac46 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -57,11 +57,17 @@ struct ldt_struct {
  /*
   * Used for LDT copy/destruction.
   */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+       mm->context.ldt = NULL;
+       init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
  void destroy_context_ldt(struct mm_struct *mm);
  #else  /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
-                                      struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+                                 struct mm_struct *mm)
  {
         return 0;
  }
@@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
  static inline int init_new_context(struct task_struct *tsk,
                                    struct mm_struct *mm)
  {
+       mutex_init(&mm->context.lock);
+
         mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
         atomic64_set(&mm->context.tlb_gen, 0);
  
-       #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
         if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
                 /* pkey 0 is the default and always allocated */
                 mm->context.pkey_allocation_map = 0x1;
                 /* -1 means unallocated or invalid */
                 mm->context.execute_only_pkey = -1;
         }
-       #endif
-       return init_new_context_ldt(tsk, mm);
+#endif
+       init_new_context_ldt(mm);
+       return 0;
  }
  static inline void destroy_context(struct mm_struct *mm)
  {
@@ -176,10 +185,10 @@ do {                                              \
  } while (0)
  #endif
  
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
  {
         paravirt_arch_dup_mmap(oldmm, mm);
+       return ldt_dup_context(oldmm, mm);
  }
  
  static inline void arch_exit_mmap(struct mm_struct *mm)
@@ -281,33 +290,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
         return __pkru_allows_pkey(vma_pkey(vma), write);
  }
  
-/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits.  This serves two purposes.  It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero.  It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
-       if (static_cpu_has(X86_FEATURE_PCID)) {
-               VM_WARN_ON_ONCE(asid > 4094);
-               return __sme_pa(mm->pgd) | (asid + 1);
-       } else {
-               VM_WARN_ON_ONCE(asid != 0);
-               return __sme_pa(mm->pgd);
-       }
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
-       VM_WARN_ON_ONCE(asid > 4094);
-       return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
  /*
   * This can be used from process context to figure out what the value of
   * CR3 is without needing to do a (slow) __read_cr3().
@@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
   */
  static inline unsigned long __get_current_cr3_fast(void)
  {
-       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
                 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
  
         /* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h

index 283efcaac8aff86f2c004bc23e4b8642cbf3d527..892df375b6155a51f584760efb9f9e77c3f732e8 100644 (file)
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
         PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                   CLBR_NONE,                                            \
                   jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers)                                        \
+       PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
+                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
  #endif /* CONFIG_X86_32 */
  
  #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h

index f2ca9b28fd68303f4494775564aa9da77ddcd53a..ce245b0cdfcaa42bd932a387bbb189ee7349bfef 100644 (file)
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
  #define LAST_PKMAP 1024
  #endif
  
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))     \
-                   & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES   (NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE                            \
+       ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE             \
+       ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
  
  #ifdef CONFIG_HIGHMEM
  # define VMALLOC_END   (PKMAP_BASE - 2 * PAGE_SIZE)
  #else
-# define VMALLOC_END   (FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END   (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
  #endif
  
  #define MODULES_VADDR  VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h

index 6d5f45dcd4a13caafbf184f323d0725c2d5f53e4..3d27831bc58dfac15e91a33d27083c0988851de2 100644 (file)
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
  #define PGDIR_MASK     (~(PGDIR_SIZE - 1))
  
  /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM         _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM                 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
  #ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(16384, UL)
+# define __VMALLOC_BASE                _AC(0xff92000000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffd4000000000000, UL)
  #else
-#define VMALLOC_SIZE_TB        _AC(32, UL)
-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(32, UL)
+# define __VMALLOC_BASE                _AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffffea0000000000, UL)
  #endif
+
  #ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START  vmalloc_base
-#define VMEMMAP_START  vmemmap_base
+# define VMALLOC_START         vmalloc_base
+# define VMEMMAP_START         vmemmap_base
  #else
-#define VMALLOC_START  __VMALLOC_BASE
-#define VMEMMAP_START  __VMEMMAP_BASE
+# define VMALLOC_START         __VMALLOC_BASE
+# define VMEMMAP_START         __VMEMMAP_BASE
  #endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END    (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END            (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR          (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
  /* The module sections ends with the start of the fixmap */
-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START    ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END      (-68 * (_AC(1, UL) << 30))
+#define MODULES_END            __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN            (MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY       _AC(-2, UL)
+#define ESPFIX_BASE_ADDR       (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD     _AC(-3, UL)
+#define CPU_ENTRY_AREA_BASE    (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START           ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END             (-68 * (_AC(1, UL) << 30))
  
  #define EARLY_DYNAMIC_PAGE_TABLES      64
  
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index cc16fa882e3e760a40351cf3e7476ac9f25ffe00..cad8dab266bceefcd91a830371716d48679c7cc7 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
  extern struct cpuinfo_x86      boot_cpu_data;
  extern struct cpuinfo_x86      new_cpu_data;
  
-extern struct tss_struct       doublefault_tss;
-extern __u32                   cpu_caps_cleared[NCAPINTS];
-extern __u32                   cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss       doublefault_tss;
+extern __u32                   cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32                   cpu_caps_set[NCAPINTS + NBUGINTS];
  
  #ifdef CONFIG_SMP
  DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
         write_cr3(__sme_pa(pgdir));
  }
  
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
  #ifdef CONFIG_X86_32
  /* This is the TSS defined by the hardware. */
  struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
  struct x86_hw_tss {
         u32                     reserved1;
         u64                     sp0;
+
+       /*
+        * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+        * Linux does not use ring 1, so sp1 is not otherwise needed.
+        */
         u64                     sp1;
+
         u64                     sp2;
         u64                     reserved2;
         u64                     ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
  #define IO_BITMAP_BITS                 65536
  #define IO_BITMAP_BYTES                        (IO_BITMAP_BITS/8)
  #define IO_BITMAP_LONGS                        (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET               offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET               (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
  #define INVALID_IO_BITMAP_OFFSET       0x8000
  
+struct entry_stack {
+       unsigned long           words[64];
+};
+
+struct entry_stack_page {
+       struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
  struct tss_struct {
         /*
-        * The hardware state:
+        * The fixed hardware portion.  This must not cross a page boundary
+        * at risk of violating the SDM's advice and potentially triggering
+        * errata.
          */
         struct x86_hw_tss       x86_tss;
  
@@ -339,18 +360,9 @@ struct tss_struct {
          * be within the limit.
          */
         unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
  
-#ifdef CONFIG_X86_32
-       /*
-        * Space for the temporary SYSENTER stack.
-        */
-       unsigned long           SYSENTER_stack_canary;
-       unsigned long           SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
  
  /*
   * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
  
  #ifdef CONFIG_X86_32
  DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
  #endif
  
  /*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
  static inline void
  native_load_sp0(unsigned long sp0)
  {
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
  }
  
  static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
  
  static inline unsigned long current_top_of_stack(void)
  {
-#ifdef CONFIG_X86_64
-       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
-       /* sp0 on x86_32 is special in and around vm86 mode. */
+       /*
+        *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
+        *  and around vm86 mode and sp0 on x86_64 is special because of the
+        *  entry trampoline.
+        */
         return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
  }
  
  static inline bool on_thread_stack(void)
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h

index 8da111b3c342bbb61a9e630e101c8a83422a15ea..f737068787729f045a578776845231b0a0ee3e0d 100644 (file)
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
         STACK_TYPE_TASK,
         STACK_TYPE_IRQ,
         STACK_TYPE_SOFTIRQ,
+       STACK_TYPE_ENTRY,
         STACK_TYPE_EXCEPTION,
         STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
  };
@@ -28,6 +29,8 @@ struct stack_info {
  bool in_task_stack(unsigned long *stack, struct task_struct *task,
                    struct stack_info *info);
  
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
  int get_stack_info(unsigned long *stack, struct task_struct *task,
                    struct stack_info *info, unsigned long *visit_mask);
  
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h

index 982c325dad3377a4f0d80a5808f4d731a87926a5..8be6afb584715dc8d5a50d1bbd989bf053366294 100644 (file)
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
  
  /* image of the saved processor state */
  struct saved_context {
-       u16 es, fs, gs, ss;
+       /*
+        * On x86_32, all segment registers, with the possible exception of
+        * gs, are saved at kernel entry in pt_regs.
+        */
+#ifdef CONFIG_X86_32_LAZY_GS
+       u16 gs;
+#endif
         unsigned long cr0, cr2, cr3, cr4;
         u64 misc_enable;
         bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h

index 7306e911faee20694908bdf482166546776a7e92..a7af9f53c0cb773d05fd84ebe94525a1971e3ebb 100644 (file)
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
   */
  struct saved_context {
         struct pt_regs regs;
-       u16 ds, es, fs, gs, ss;
-       unsigned long gs_base, gs_kernel_base, fs_base;
+
+       /*
+        * User CS and SS are saved in current_pt_regs().  The rest of the
+        * segment selectors need to be saved and restored here.
+        */
+       u16 ds, es, fs, gs;
+
+       /*
+        * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+        * so we save them separately.  We save the kernelmode GSBASE to
+        * restore percpu access after resume.
+        */
+       unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
         unsigned long cr0, cr2, cr3, cr4, cr8;
         u64 misc_enable;
         bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
         u16 gdt_pad; /* Unused */
         struct desc_ptr gdt_desc;
         u16 idt_pad;
-       u16 idt_limit;
-       unsigned long idt_base;
+       struct desc_ptr idt;
         u16 ldt;
         u16 tss;
         unsigned long tr;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h

index 8c6bd6863db9d6b737cd0649324c154f9b9798a3..9b6df68d8fd1eba26f3651faa5c8b8f4dcf223f1 100644 (file)
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,10 +79,10 @@ do {                                                                        \
  static inline void refresh_sysenter_cs(struct thread_struct *thread)
  {
         /* Only happens when SEP is enabled, no need to test "SEP"arately: */
-       if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+       if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
                 return;
  
-       this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+       this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
         wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
  }
  #endif
@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
  /* This is used when switching tasks or entering/exiting vm86 mode. */
  static inline void update_sp0(struct task_struct *task)
  {
+       /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
  #ifdef CONFIG_X86_32
         load_sp0(task->thread.sp0);
  #else
-       load_sp0(task_top_of_stack(task));
+       if (static_cpu_has(X86_FEATURE_XENPV))
+               load_sp0(task_top_of_stack(task));
  #endif
  }
  
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h

index 70f425947dc50f3e99ca639c0ead0d7e1cce636d..00223333821a96616647a9cbb6fe729c4a18b7b6 100644 (file)
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
  #else /* !__ASSEMBLY__ */
  
  #ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
  #endif
  
  #endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h

index 877b5c1a1b1247116e20e7272dbade77e1874fc4..e1884cf35257b8133ca97f50d146ae3ebfcaa30f 100644 (file)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,66 @@
  #include <asm/cpufeature.h>
  #include <asm/special_insns.h>
  #include <asm/smp.h>
+#include <asm/invpcid.h>
  
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
-                            unsigned long type)
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
  {
-       struct { u64 d[2]; } desc = { { pcid, addr } };
-
         /*
-        * The memory clobber is because the whole point is to invalidate
-        * stale TLB entries and, especially if we're flushing global
-        * mappings, we don't want the compiler to reorder any subsequent
-        * memory accesses before the TLB flush.
-        *
-        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-        * invpcid (%rcx), %rax in long mode.
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
          */
-       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+       return atomic64_inc_return(&mm->context.tlb_gen);
  }
  
-#define INVPCID_TYPE_INDIV_ADDR                0
-#define INVPCID_TYPE_SINGLE_CTXT       1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
-#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS               12
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#define PTI_CONSUMED_ASID_BITS         0
  
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
-                                    unsigned long addr)
-{
-       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+ * for them being zero-based.  Another -1 is because ASID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
  
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+static inline u16 kern_pcid(u16 asid)
  {
-       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       /*
+        * If PCID is on, ASID-aware code paths put the ASID+1 into the
+        * PCID bits.  This serves two purposes.  It prevents a nasty
+        * situation in which PCID-unaware code saves CR3, loads some other
+        * value (with PCID == 0), and then restores CR3, thus corrupting
+        * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
+        * that any bugs involving loading a PCID-enabled CR3 with
+        * CR4.PCIDE off will trigger deterministically.
+        */
+       return asid + 1;
  }
  
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
  {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+               return __sme_pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(pgd);
+       }
  }
  
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
  {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
-}
-
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-{
-       u64 new_tlb_gen;
-
-       /*
-        * Bump the generation count.  This also serves as a full barrier
-        * that synchronizes with switch_mm(): callers are required to order
-        * their read of mm_cpumask after their writes to the paging
-        * structures.
-        */
-       smp_mb__before_atomic();
-       new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-       smp_mb__after_atomic();
-
-       return new_tlb_gen;
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
  }
  
  #ifdef CONFIG_PARAVIRT
@@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
  
  extern void initialize_tlbstate_and_flush(void);
  
+/*
+ * flush the entire current user mapping
+ */
  static inline void __native_flush_tlb(void)
  {
         /*
@@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void)
         preempt_enable();
  }
  
-static inline void __native_flush_tlb_global_irq_disabled(void)
-{
-       unsigned long cr4;
-
-       cr4 = this_cpu_read(cpu_tlbstate.cr4);
-       /* clear PGE */
-       native_write_cr4(cr4 & ~X86_CR4_PGE);
-       /* write old PGE again and flush TLBs */
-       native_write_cr4(cr4);
-}
-
+/*
+ * flush everything
+ */
  static inline void __native_flush_tlb_global(void)
  {
-       unsigned long flags;
+       unsigned long cr4, flags;
  
         if (static_cpu_has(X86_FEATURE_INVPCID)) {
                 /*
@@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void)
          */
         raw_local_irq_save(flags);
  
-       __native_flush_tlb_global_irq_disabled();
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       /* toggle PGE */
+       native_write_cr4(cr4 ^ X86_CR4_PGE);
+       /* write old PGE again and flush TLBs */
+       native_write_cr4(cr4);
  
         raw_local_irq_restore(flags);
  }
  
+/*
+ * flush one page in the user mapping
+ */
  static inline void __native_flush_tlb_single(unsigned long addr)
  {
         asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
  }
  
+/*
+ * flush everything
+ */
  static inline void __flush_tlb_all(void)
  {
-       if (boot_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                 __flush_tlb_global();
-       else
+       } else {
+               /*
+                * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+                */
                 __flush_tlb();
+       }
  
         /*
          * Note: if we somehow had PCID but not PGE, then this wouldn't work --
@@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void)
          */
  }
  
+/*
+ * flush one page in the kernel mapping
+ */
  static inline void __flush_tlb_one(unsigned long addr)
  {
         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h

index 1fadd310ff680ece697fa65a8db410c380a8547e..31051f35cbb768e452c4f76a60c5415a45f572e7 100644 (file)
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
  dotraplinkage void do_stack_segment(struct pt_regs *, long);
  #ifdef CONFIG_X86_64
  dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
  #endif
  dotraplinkage void do_general_protection(struct pt_regs *, long);
  dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h

index e9cc6fe1fc6f953c38ddcc61fcf06fd90d72ab04..c1688c2d0a128f063053697dc60bcbfbca509765 100644 (file)
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
  #include <asm/ptrace.h>
  #include <asm/stacktrace.h>
  
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
  struct unwind_state {
         struct stack_info stack_info;
         unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
  }
  
  #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+/*
+ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
+ * only the iret frame registers are accessible.  Use with caution!
+ */
  static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
  {
         if (unwind_done(state))
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c

index 8ea78275480dafeb702e11ba73364cd9e7c52f21..676b7cf4b62bf84a72da2cf690efac745f124e77 100644 (file)
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -93,4 +93,10 @@ void common(void) {
  
         BLANK();
         DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+       OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+       DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
  }
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c

index dedf428b20b68b0a4748fc1ac3032193c9121362..fa1261eefa16e73cedf27aadb878753be693f919 100644 (file)
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
         BLANK();
  
         /* Offset from the sysenter stack to tss.sp0 */
-       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-              offsetofend(struct tss_struct, SYSENTER_stack));
-
-       /* Offset from cpu_tss to SYSENTER_stack */
-       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-       /* Size of SYSENTER_stack */
-       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+              offsetofend(struct cpu_entry_area, entry_stack_page.stack));
  
  #ifdef CONFIG_CC_STACKPROTECTOR
         BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c

index 630212fa9b9da3f0498fc30d4c193c5926c43abb..bf51e51d808dd8914abd3b4bca69b37ce3ec023b 100644 (file)
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
  #ifdef CONFIG_PARAVIRT
         OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
         OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+       OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
         BLANK();
  #endif
  
@@ -63,6 +66,7 @@ int main(void)
  
         OFFSET(TSS_ist, tss_struct, x86_tss.ist);
         OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+       OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
         BLANK();
  
  #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index fa998ca8aa5aa5b4899dbe8a57c5b543f927009e..c9757f07d738af73ce3bd14c51780c71a512395f 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
         return NULL;            /* Not found */
  }
  
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
  
  void load_percpu_segment(int cpu)
  {
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
         load_stack_canary_segment();
  }
  
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
-       pgprot_t prot = PAGE_KERNEL_RO;
-#else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-        * our double fault handler uses a task gate, and entering through
-        * a task gate needs to change an available TSS to busy.  If the GDT
-        * is read-only, that will triple fault.
-        *
-        * On Xen PV, the GDT must be read-only because the hypervisor requires
-        * it.
-        */
-       pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
  #endif
  
-       __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+};
+#endif
  
  /* Load the original GDT from the per-cpu structure */
  void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
  {
         int i;
  
-       for (i = 0; i < NCAPINTS; i++) {
+       for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
                 c->x86_capability[i] &= ~cpu_caps_cleared[i];
                 c->x86_capability[i] |= cpu_caps_set[i];
         }
@@ -1250,7 +1245,7 @@ void enable_sep_cpu(void)
                 return;
  
         cpu = get_cpu();
-       tss = &per_cpu(cpu_tss, cpu);
+       tss = &per_cpu(cpu_tss_rw, cpu);
  
         /*
          * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1254,7 @@ void enable_sep_cpu(void)
  
         tss->x86_tss.ss1 = __KERNEL_CS;
         wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-       wrmsr(MSR_IA32_SYSENTER_ESP,
-             (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-             0);
-
+       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
         wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
  
         put_cpu();
@@ -1357,25 +1348,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
  DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
  EXPORT_PER_CPU_SYMBOL(__preempt_count);
  
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
  /* May not be marked __init: used by software suspend */
  void syscall_init(void)
  {
+       extern char _entry_trampoline[];
+       extern char entry_SYSCALL_64_trampoline[];
+
+       int cpu = smp_processor_id();
+       unsigned long SYSCALL64_entry_trampoline =
+               (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+               (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
         wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+       wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
  
  #ifdef CONFIG_IA32_EMULATION
         wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1371,7 @@ void syscall_init(void)
          * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
          */
         wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
         wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
  #else
         wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1515,7 @@ void cpu_init(void)
         if (cpu)
                 load_ucode_ap();
  
-       t = &per_cpu(cpu_tss, cpu);
+       t = &per_cpu(cpu_tss_rw, cpu);
         oist = &per_cpu(orig_ist, cpu);
  
  #ifdef CONFIG_NUMA
@@ -1569,7 +1554,7 @@ void cpu_init(void)
          * set up and load the per-CPU TSS
          */
         if (!oist->ist[0]) {
-               char *estacks = per_cpu(exception_stacks, cpu);
+               char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
  
                 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                         estacks += exception_stack_sizes[v];
@@ -1580,7 +1565,7 @@ void cpu_init(void)
                 }
         }
  
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
  
         /*
          * <= is required because the CPU will access up to
@@ -1596,11 +1581,12 @@ void cpu_init(void)
         enter_lazy_tlb(&init_mm, me);
  
         /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
+        * Initialize the TSS.  sp0 points to the entry trampoline stack
+        * regardless of what task is running.
          */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
         load_TR_desc();
+       load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
  
         load_mm_ldt(&init_mm);
  
@@ -1612,7 +1598,6 @@ void cpu_init(void)
         if (is_uv_system())
                 uv_cpu_init();
  
-       setup_fixmap_gdt(cpu);
         load_fixmap_gdt(cpu);
  }
  
@@ -1622,7 +1607,7 @@ void cpu_init(void)
  {
         int cpu = smp_processor_id();
         struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
  
         wait_for_master_cpu(cpu);
  
@@ -1657,12 +1642,12 @@ void cpu_init(void)
          * Initialize the TSS.  Don't bother initializing sp0, as the initial
          * task never enters user mode.
          */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
         load_TR_desc();
  
         load_mm_ldt(&init_mm);
  
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
  
  #ifdef CONFIG_DOUBLEFAULT
         /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1659,6 @@ void cpu_init(void)
  
         fpu__init_cpu();
  
-       setup_fixmap_gdt(cpu);
         load_fixmap_gdt(cpu);
  }
  #endif
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c

index 7dbcb7adf7975f7f29c38651c23c478ad315a34c..8ccdca6d3f9e9b876ee27f021ed8c021b1168220 100644 (file)
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
  }
  #else
  
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-       __native_flush_tlb_global_irq_disabled();
-}
-
  static inline void print_ucode(struct ucode_cpu_info *uci)
  {
         struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
         if (rev != mc->hdr.rev)
                 return -1;
  
-#ifdef CONFIG_X86_64
-       /* Flush global tlb. This is precaution. */
-       flush_tlb_early();
-#endif
         uci->cpu_sig.rev = rev;
  
         if (early)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c

index 0e662c55ae902fedd5c78c1ed87a972b35a79856..0b8cedb20d6d92f2875a49292680c8cfecd5b044 100644 (file)
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
                 cpu_relax();
  }
  
-struct tss_struct doublefault_tss __cacheline_aligned = {
-       .x86_tss = {
-               .sp0            = STACK_START,
-               .ss0            = __KERNEL_DS,
-               .ldt            = 0,
-               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
-               .ip             = (unsigned long) doublefault_fn,
-               /* 0x2 bit is always set */
-               .flags          = X86_EFLAGS_SF | 0x2,
-               .sp             = STACK_START,
-               .es             = __USER_DS,
-               .cs             = __KERNEL_CS,
-               .ss             = __KERNEL_DS,
-               .ds             = __USER_DS,
-               .fs             = __KERNEL_PERCPU,
-
-               .__cr3          = __pa_nodebug(swapper_pg_dir),
-       }
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+       .sp0            = STACK_START,
+       .ss0            = __KERNEL_DS,
+       .ldt            = 0,
+       .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+
+       .ip             = (unsigned long) doublefault_fn,
+       /* 0x2 bit is always set */
+       .flags          = X86_EFLAGS_SF | 0x2,
+       .sp             = STACK_START,
+       .es             = __USER_DS,
+       .cs             = __KERNEL_CS,
+       .ss             = __KERNEL_DS,
+       .ds             = __USER_DS,
+       .fs             = __KERNEL_PERCPU,
+
+       .__cr3          = __pa_nodebug(swapper_pg_dir),
  };
  
  /* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c

index f13b4c00a5de4b7a7b36c40d27311672bcc9d05c..36b17e0febe8629a9dde305625fccfc723943969 100644 (file)
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
  #include <linux/nmi.h>
  #include <linux/sysfs.h>
  
+#include <asm/cpu_entry_area.h>
  #include <asm/stacktrace.h>
  #include <asm/unwind.h>
  
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
         return true;
  }
  
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+       struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+       void *begin = ss;
+       void *end = ss + 1;
+
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+
+       info->type      = STACK_TYPE_ENTRY;
+       info->begin     = begin;
+       info->end       = end;
+       info->next_sp   = NULL;
+
+       return true;
+}
+
  static void printk_stack_address(unsigned long address, int reliable,
                                  char *log_lvl)
  {
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
         printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
  }
  
+void show_iret_regs(struct pt_regs *regs)
+{
+       printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+       printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+               regs->sp, regs->flags);
+}
+
+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+{
+       if (on_stack(info, regs, sizeof(*regs)))
+               __show_regs(regs, 0);
+       else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+                         IRET_FRAME_SIZE)) {
+               /*
+                * When an interrupt or exception occurs in entry code, the
+                * full pt_regs might not have been saved yet.  In that case
+                * just print the iret frame.
+                */
+               show_iret_regs(regs);
+       }
+}
+
  void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                         unsigned long *stack, char *log_lvl)
  {
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
          * - task stack
          * - interrupt stack
          * - HW exception stacks (double fault, nmi, debug, mce)
+        * - entry stack
          *
-        * x86-32 can have up to three stacks:
+        * x86-32 can have up to four stacks:
          * - task stack
          * - softirq stack
          * - hardirq stack
+        * - entry stack
          */
         for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
                 const char *stack_name;
  
-               /*
-                * If we overflowed the task stack into a guard page, jump back
-                * to the bottom of the usable stack.
-                */
-               if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
-                       stack = task_stack_page(task);
-
-               if (get_stack_info(stack, task, &stack_info, &visit_mask))
-                       break;
+               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+                       /*
+                        * We weren't on a valid stack.  It's possible that
+                        * we overflowed a valid stack into a guard page.
+                        * See if the next page up is valid so that we can
+                        * generate some kind of backtrace if this happens.
+                        */
+                       stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+                       if (get_stack_info(stack, task, &stack_info, &visit_mask))
+                               break;
+               }
  
                 stack_name = stack_type_name(stack_info.type);
                 if (stack_name)
                         printk("%s <%s>\n", log_lvl, stack_name);
  
-               if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                       __show_regs(regs, 0);
+               if (regs)
+                       show_regs_safe(&stack_info, regs);
  
                 /*
                  * Scan the stack, printing any text addresses we find.  At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
  
                         /*
                          * Don't print regs->ip again if it was already printed
-                        * by __show_regs() below.
+                        * by show_regs_safe() below.
                          */
                         if (regs && stack == &regs->ip)
                                 goto next;
@@ -155,8 +200,8 @@ next:
  
                         /* if the frame has entry regs, print them */
                         regs = unwind_get_entry_regs(&state);
-                       if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                               __show_regs(regs, 0);
+                       if (regs)
+                               show_regs_safe(&stack_info, regs);
                 }
  
                 if (stack_name)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c

index daefae83a3aa86c59602b75bd3e6734c6e3b1030..04170f63e3a1d567caac3deea641e014b7e10823 100644 (file)
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
         if (type == STACK_TYPE_SOFTIRQ)
                 return "SOFTIRQ";
  
+       if (type == STACK_TYPE_ENTRY)
+               return "ENTRY_TRAMPOLINE";
+
         return NULL;
  }
  
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
         if (task != current)
                 goto unknown;
  
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
         if (in_hardirq_stack(stack, info))
                 goto recursion_check;
  
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c

index 88ce2ffdb110303502ad33e64d357d8af5afd8c6..563e28d14f2ca157178d9de3a139d8370aaf89fe 100644 (file)
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
         if (type == STACK_TYPE_IRQ)
                 return "IRQ";
  
+       if (type == STACK_TYPE_ENTRY) {
+               /*
+                * On 64-bit, we have a generic entry stack that we
+                * use for all the kernel entry points, including
+                * SYSENTER.
+                */
+               return "ENTRY_TRAMPOLINE";
+       }
+
         if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
                 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
  
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
         if (in_irq_stack(stack, info))
                 goto recursion_check;
  
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
         goto unknown;
  
  recursion_check:
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c

index 3feb648781c470a7a49ee26749712ba7da891fe9..2f723301eb58fc5ad0d6796b342446ae2ee0c9e6 100644 (file)
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
          * because the ->io_bitmap_max value must match the bitmap
          * contents:
          */
-       tss = &per_cpu(cpu_tss, get_cpu());
+       tss = &per_cpu(cpu_tss_rw, get_cpu());
  
         if (turn_on)
                 bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c

index 49cfd9fe7589fa5ef2bef5d4a5d6431b7007836f..68e1867cca8045d0ed728ffc6b75a866c25484ed 100644 (file)
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
         /* high bit used in ret_from_ code  */
         unsigned vector = ~regs->orig_ax;
  
-       /*
-        * NB: Unlike exception entries, IRQ entries do not reliably
-        * handle context tracking in the low-level entry code.  This is
-        * because syscall entries execute briefly with IRQs on before
-        * updating context tracking state, so we can take an IRQ from
-        * kernel mode with CONTEXT_USER.  The low-level entry code only
-        * updates the context if we came from user mode, so we won't
-        * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
-        * code is cleaned up enough that we can cleanly defer enabling
-        * IRQs.
-        */
-
         entering_irq();
  
         /* entering_irq() tells RCU that we're not quiescent.  Check it. */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c

index 020efbf5786b35d343a8632cd14ac4f800465d9b..d86e344f5b3debfed504b72a7c0f83f36fe16387 100644 (file)
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
         if (regs->sp >= estack_top && regs->sp <= estack_bottom)
                 return;
  
-       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
                 current->comm, curbase, regs->sp,
                 irq_stack_top, irq_stack_bottom,
-               estack_top, estack_bottom);
+               estack_top, estack_bottom, (void *)regs->ip);
  
         if (sysctl_panic_on_stackoverflow)
                 panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c

index 1c1eae9613406b14c3154065e1fd036f985a384c..a6b5d62f45a737b84124411f56a64fdd869ca4f8 100644 (file)
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
   * Copyright (C) 2002 Andi Kleen
   *
   * This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ *     contex.ldt_usr_sem
+ *       mmap_sem
+ *         context.lock
   */
  
  #include <linux/errno.h>
@@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
  #endif
  }
  
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
  static void flush_ldt(void *__mm)
  {
         struct mm_struct *mm = __mm;
@@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
         paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
  }
  
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
-                       struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
  {
+       mutex_lock(&mm->context.lock);
+
         /* Synchronizes with READ_ONCE in load_mm_ldt. */
-       smp_store_release(&current_mm->context.ldt, ldt);
+       smp_store_release(&mm->context.ldt, ldt);
  
-       /* Activate the LDT for all CPUs using current_mm. */
-       on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+       /* Activate the LDT for all CPUs using currents mm. */
+       on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+       mutex_unlock(&mm->context.lock);
  }
  
  static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
  }
  
  /*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
   */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
  {
         struct ldt_struct *new_ldt;
-       struct mm_struct *old_mm;
         int retval = 0;
  
-       mutex_init(&mm->context.lock);
-       old_mm = current->mm;
-       if (!old_mm) {
-               mm->context.ldt = NULL;
+       if (!old_mm)
                 return 0;
-       }
  
         mutex_lock(&old_mm->context.lock);
-       if (!old_mm->context.ldt) {
-               mm->context.ldt = NULL;
+       if (!old_mm->context.ldt)
                 goto out_unlock;
-       }
  
         new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
         if (!new_ldt) {
@@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
         unsigned long entries_size;
         int retval;
  
-       mutex_lock(&mm->context.lock);
+       down_read(&mm->context.ldt_usr_sem);
  
         if (!mm->context.ldt) {
                 retval = 0;
@@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
         retval = bytecount;
  
  out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_read(&mm->context.ldt_usr_sem);
         return retval;
  }
  
@@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
                         ldt.avl = 0;
         }
  
-       mutex_lock(&mm->context.lock);
+       if (down_write_killable(&mm->context.ldt_usr_sem))
+               return -EINTR;
  
         old_ldt       = mm->context.ldt;
         old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
         error = 0;
  
  out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_write(&mm->context.ldt_usr_sem);
  out:
         return error;
  }
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c

index ac0be8283325edfdc2752f862b4c0cef208a931c..9edadabf04f66c657f8a29bb56fe994b2559d5cf 100644 (file)
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
  DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
  DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
  DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
  DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
  
  DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                 PATCH_SITE(pv_mmu_ops, read_cr2);
                 PATCH_SITE(pv_mmu_ops, read_cr3);
                 PATCH_SITE(pv_mmu_ops, write_cr3);
-               PATCH_SITE(pv_mmu_ops, flush_tlb_single);
                 PATCH_SITE(pv_cpu_ops, wbinvd);
  #if defined(CONFIG_PARAVIRT_SPINLOCKS)
                 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c

index bb988a24db927d758f9120d45f90d1c160628790..aed9d94bd46f41bb049b8e0153a44a43d97e80b4 100644 (file)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
   * section. Since TSS's are completely CPU-local, we want them
   * on exact cacheline boundaries, to eliminate cacheline ping-pong.
   */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
         .x86_tss = {
                 /*
                  * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
                  * Poison it.
                  */
                 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+               /*
+                * .sp1 is cpu_current_top_of_stack.  The init task never
+                * runs user code, but cpu_current_top_of_stack should still
+                * be well defined before the first context switch.
+                */
+               .sp1 = TOP_OF_INIT_STACK,
+#endif
+
  #ifdef CONFIG_X86_32
                 .ss0 = __KERNEL_DS,
                 .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
           */
         .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
  #endif
-#ifdef CONFIG_X86_32
-       .SYSENTER_stack_canary  = STACK_END_MAGIC,
-#endif
  };
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
  
  DEFINE_PER_CPU(bool, __tss_limit_invalid);
  EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
         struct fpu *fpu = &t->fpu;
  
         if (bp) {
-               struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+               struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
  
                 t->io_bitmap_ptr = NULL;
                 clear_thread_flag(TIF_IO_BITMAP);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c

index 45bf0c5f93e15103060d67d5245756ab72ce8fe5..5224c609918416337b97440eb2d515d8052463ae 100644 (file)
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         struct fpu *prev_fpu = &prev->fpu;
         struct fpu *next_fpu = &next->fpu;
         int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
  
         /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
  
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c

index eeeb34f85c250e8c01188b6d32cf5a62bd1af8a0..c754662320163107ca3a254362ce0e404a8d3c11 100644 (file)
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
         unsigned int fsindex, gsindex;
         unsigned int ds, cs, es;
  
-       printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
-       printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
-               regs->sp, regs->flags);
+       show_iret_regs(regs);
+
         if (regs->orig_ax != -1)
                 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
         else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
         printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
                regs->r13, regs->r14, regs->r15);
  
+       if (!all)
+               return;
+
         asm("movl %%ds,%0" : "=r" (ds));
         asm("movl %%cs,%0" : "=r" (cs));
         asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
         rdmsrl(MSR_GS_BASE, gs);
         rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
  
-       if (!all)
-               return;
-
         cr0 = read_cr0();
         cr2 = read_cr2();
         cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         struct fpu *prev_fpu = &prev->fpu;
         struct fpu *next_fpu = &next->fpu;
         int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
  
         WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
                      this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
          * Switch the PDA and FPU contexts.
          */
         this_cpu_write(current_task, next_p);
+       this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
  
         /* Reload sp0. */
         update_sp0(next_p);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 05a97d5fe29840e387a66aa00f5041d8c749ad17..c5970efa85570ab324bd1cad2e57d464dba86f46 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
  static unsigned int logical_packages __read_mostly;
  
  /* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
  
  /* Flag to indicate if a complete sched domain rebuild is required */
  bool x86_topology_update;
@@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
         initial_code = (unsigned long)start_secondary;
         initial_stack  = idle->thread.sp;
  
-       /*
-        * Enable the espfix hack for this CPU
-       */
-#ifdef CONFIG_X86_ESPFIX64
+       /* Enable the espfix hack for this CPU */
         init_espfix_ap(cpu);
-#endif
  
         /* So we see what's up */
         announce_cpu(cpu, apicid);
@@ -1304,7 +1300,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
          * Today neither Intel nor AMD support heterogenous systems so
          * extrapolate the boot cpu's data to all packages.
          */
-       ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+       ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
         __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
         pr_info("Max logical packages: %u\n", __max_logical_packages);
  
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c

index 989514c94a55d8fa93a07192edd199be1a607bf8..f69dbd47d7332f4af7e5f274bb6aa9736f3014bd 100644 (file)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
  #include <asm/traps.h>
  #include <asm/desc.h>
  #include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
  #include <asm/mce.h>
  #include <asm/fixmap.h>
  #include <asm/mach_traps.h>
@@ -348,9 +349,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
  
         /*
          * If IRET takes a non-IST fault on the espfix64 stack, then we
-        * end up promoting it to a doublefault.  In that case, modify
-        * the stack to make it look like we just entered the #GP
-        * handler from user space, similar to bad_iret.
+        * end up promoting it to a doublefault.  In that case, take
+        * advantage of the fact that we're not using the normal (TSS.sp0)
+        * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
+        * and then modify our own IRET frame so that, when we return,
+        * we land directly at the #GP(0) vector with the stack already
+        * set up according to its expectations.
+        *
+        * The net result is that our #GP handler will think that we
+        * entered from usermode with the bad user context.
          *
          * No need for ist_enter here because we don't use RCU.
          */
@@ -358,13 +365,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
                 regs->cs == __KERNEL_CS &&
                 regs->ip == (unsigned long)native_irq_return_iret)
         {
-               struct pt_regs *normal_regs = task_pt_regs(current);
+               struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
  
-               /* Fake a #GP(0) from userspace. */
-               memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
-               normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
+               /*
+                * regs->sp points to the failing IRET frame on the
+                * ESPFIX64 stack.  Copy it to the entry stack.  This fills
+                * in gpregs->ss through gpregs->ip.
+                *
+                */
+               memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+               gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
+
+               /*
+                * Adjust our frame so that we return straight to the #GP
+                * vector with the expected RSP value.  This is safe because
+                * we won't enable interupts or schedule before we invoke
+                * general_protection, so nothing will clobber the stack
+                * frame we just set up.
+                */
                 regs->ip = (unsigned long)general_protection;
-               regs->sp = (unsigned long)&normal_regs->orig_ax;
+               regs->sp = (unsigned long)&gpregs->orig_ax;
  
                 return;
         }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
          *
          *   Processors update CR2 whenever a page fault is detected. If a
          *   second page fault occurs while an earlier page fault is being
-        *   deliv- ered, the faulting linear address of the second fault will
+        *   delivered, the faulting linear address of the second fault will
          *   overwrite the contents of CR2 (replacing the previous
          *   address). These updates to CR2 occur even if the page fault
          *   results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
  
  #ifdef CONFIG_X86_64
  /*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
   */
  asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
  {
-       struct pt_regs *regs = task_pt_regs(current);
-       *regs = *eregs;
+       struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+       if (regs != eregs)
+               *regs = *eregs;
         return regs;
  }
  NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
         /*
          * This is called from entry_64.S early in handling a fault
          * caused by a bad iret to user mode.  To handle the fault
-        * correctly, we want move our stack frame to task_pt_regs
-        * and we want to pretend that the exception came from the
-        * iret target.
+        * correctly, we want to move our stack frame to where it would
+        * be had we entered directly on the entry stack (rather than
+        * just below the IRET frame) and we want to pretend that the
+        * exception came from the IRET target.
          */
         struct bad_iret_stack *new_stack =
-               container_of(task_pt_regs(current),
-                            struct bad_iret_stack, regs);
+               (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
  
         /* Copy the IRET target to the new stack. */
         memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
         debug_stack_usage_dec();
  
  exit:
-#if defined(CONFIG_X86_32)
-       /*
-        * This is the most likely code path that involves non-trivial use
-        * of the SYSENTER stack.  Check that we haven't overrun it.
-        */
-       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
-            "Overran or corrupted SYSENTER stack\n");
-#endif
         ist_exit(regs);
  }
  NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
  
  void __init trap_init(void)
  {
+       /* Init cpu_entry_area before IST entries are set up */
+       setup_cpu_entry_areas();
+
         idt_setup_traps();
  
         /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
          * "sidt" instruction will not leak the location of the kernel, and
          * to defend the IDT against arbitrary memory write vulnerabilities.
          * It will be reloaded in cpu_init() */
-       __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
-       idt_descr.address = fix_to_virt(FIX_RO_IDT);
+       cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+                   PAGE_KERNEL_RO);
+       idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
  
         /*
          * Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c

index a3f973b2c97a03b121fe0173dbdc9298216721e6..be86a865087a6b9dc8e04031dbf2e2fbeeda1ed5 100644 (file)
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
         return NULL;
  }
  
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
                             size_t len)
  {
         struct stack_info *info = &state->stack_info;
+       void *addr = (void *)_addr;
  
-       /*
-        * If the address isn't on the current stack, switch to the next one.
-        *
-        * We may have to traverse multiple stacks to deal with the possibility
-        * that info->next_sp could point to an empty stack and the address
-        * could be on a subsequent stack.
-        */
-       while (!on_stack(info, (void *)addr, len))
-               if (get_stack_info(info->next_sp, state->task, info,
-                                  &state->stack_mask))
-                       return false;
+       if (!on_stack(info, addr, len) &&
+           (get_stack_info(addr, state->task, info, &state->stack_mask)))
+               return false;
  
         return true;
  }
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
         return true;
  }
  
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
  static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
-                            unsigned long *ip, unsigned long *sp, bool full)
+                            unsigned long *ip, unsigned long *sp)
  {
-       size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
-       size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
-       struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
-       if (IS_ENABLED(CONFIG_X86_64)) {
-               if (!stack_access_ok(state, addr, regs_size))
-                       return false;
+       struct pt_regs *regs = (struct pt_regs *)addr;
  
-               *ip = regs->ip;
-               *sp = regs->sp;
+       /* x86-32 support will be more complicated due to the &regs->sp hack */
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
  
-               return true;
-       }
-
-       if (!stack_access_ok(state, addr, sp_offset))
+       if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
                 return false;
  
         *ip = regs->ip;
+       *sp = regs->sp;
+       return true;
+}
  
-       if (user_mode(regs)) {
-               if (!stack_access_ok(state, addr + sp_offset,
-                                    REGS_SIZE - SP_OFFSET))
-                       return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+                                 unsigned long *ip, unsigned long *sp)
+{
+       struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
  
-               *sp = regs->sp;
-       } else
-               *sp = (unsigned long)&regs->sp;
+       if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+               return false;
  
+       *ip = regs->ip;
+       *sp = regs->sp;
         return true;
  }
  
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
         unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
         enum stack_type prev_type = state->stack_info.type;
         struct orc_entry *orc;
-       struct pt_regs *ptregs;
         bool indirect = false;
  
         if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
                 break;
  
         case ORC_TYPE_REGS:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+               if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
                         orc_warn("can't dereference registers at %p for ip %pB\n",
                                  (void *)sp, (void *)orig_ip);
                         goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
                 break;
  
         case ORC_TYPE_REGS_IRET:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+               if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
                         orc_warn("can't dereference iret registers at %p for ip %pB\n",
                                  (void *)sp, (void *)orig_ip);
                         goto done;
                 }
  
-               ptregs = container_of((void *)sp, struct pt_regs, ip);
-               if ((unsigned long)ptregs >= prev_sp &&
-                   on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
-                       state->regs = ptregs;
-                       state->full_regs = false;
-               } else
-                       state->regs = NULL;
-
+               state->regs = (void *)sp - IRET_FRAME_OFFSET;
+               state->full_regs = false;
                 state->signal = true;
                 break;
  
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
         }
  
         if (get_stack_info((unsigned long *)state->sp, state->task,
-                          &state->stack_info, &state->stack_mask))
-               return;
+                          &state->stack_info, &state->stack_mask)) {
+               /*
+                * We weren't on a valid stack.  It's possible that
+                * we overflowed a valid stack into a guard page.
+                * See if the next page up is valid so that we can
+                * generate some kind of backtrace if this happens.
+                */
+               void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+               if (get_stack_info(next_page, state->task, &state->stack_info,
+                                  &state->stack_mask))
+                       return;
+       }
  
         /*
          * The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S

index a4009fb9be8725ce7bda96cd5e8160e524903266..d2a8b5a24a44a554e2f81f3b30309ef39aba0d8a 100644 (file)
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -107,6 +107,15 @@ SECTIONS
                 SOFTIRQENTRY_TEXT
                 *(.fixup)
                 *(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+               . = ALIGN(PAGE_SIZE);
+               _entry_trampoline = .;
+               *(.entry_trampoline)
+               . = ALIGN(PAGE_SIZE);
+               ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
                 /* End of text section */
                 _etext = .;
         } :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index abe74f779f9d793e9a6c2f19417f23b5aa7ce484..b514b2b2845a334d4b53f28ed0b73c96f12d0e6a 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
  }
  
  static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-                                    u64 cr0, u64 cr4)
+                                   u64 cr0, u64 cr3, u64 cr4)
  {
         int bad;
+       u64 pcid;
+
+       /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+       pcid = 0;
+       if (cr4 & X86_CR4_PCIDE) {
+               pcid = cr3 & 0xfff;
+               cr3 &= ~0xfff;
+       }
+
+       bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+       if (bad)
+               return X86EMUL_UNHANDLEABLE;
  
         /*
          * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
                 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
                 if (bad)
                         return X86EMUL_UNHANDLEABLE;
+               if (pcid) {
+                       bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+                       if (bad)
+                               return X86EMUL_UNHANDLEABLE;
+               }
+
         }
  
         return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
         struct desc_struct desc;
         struct desc_ptr dt;
         u16 selector;
-       u32 val, cr0, cr4;
+       u32 val, cr0, cr3, cr4;
         int i;
  
         cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
-       ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+       cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
         ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
         ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
  
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
  
         ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
  
-       return rsm_enter_protected_mode(ctxt, cr0, cr4);
+       return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
  }
  
  static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
  {
         struct desc_struct desc;
         struct desc_ptr dt;
-       u64 val, cr0, cr4;
+       u64 val, cr0, cr3, cr4;
         u32 base3;
         u16 selector;
         int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
         ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
  
         cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
-       ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+       cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
         cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
         ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
         val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
         dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
         ctxt->ops->set_gdt(ctxt, &dt);
  
-       r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+       r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
         if (r != X86EMUL_CONTINUE)
                 return r;
  
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index e5e66e5c664057bb5cc5ad2660008ccbf19b69e5..c4deb1f34faa6ce7ffe6bcaaebddc3e87b2a9a69 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                 spin_lock(&vcpu->kvm->mmu_lock);
                 if(make_mmu_pages_available(vcpu) < 0) {
                         spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                 }
                 sp = kvm_mmu_get_page(vcpu, 0, 0,
                                 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                         spin_lock(&vcpu->kvm->mmu_lock);
                         if (make_mmu_pages_available(vcpu) < 0) {
                                 spin_unlock(&vcpu->kvm->mmu_lock);
-                               return 1;
+                               return -ENOSPC;
                         }
                         sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
                                         i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                 spin_lock(&vcpu->kvm->mmu_lock);
                 if (make_mmu_pages_available(vcpu) < 0) {
                         spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                 }
                 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                 spin_lock(&vcpu->kvm->mmu_lock);
                 if (make_mmu_pages_available(vcpu) < 0) {
                         spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                 }
                 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
                                       0, ACC_ALL);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index 8eba631c4dbd509d8687c6135e8dba267042f5e0..023afa0c8887002d6a79a8b121b46996feec1a61 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                  * processors.  See 22.2.4.
                  */
                 vmcs_writel(HOST_TR_BASE,
-                           (unsigned long)this_cpu_ptr(&cpu_tss));
+                           (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
                 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
  
                 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index faf843c9b916ead0992d0b155a138c6afdf7ae57..1cec2c62a0b08405d2bd7c8908d6b7f33de3b63c 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
                                          addr, n, v))
                     && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                         break;
-               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
                 handled += n;
                 addr += n;
                 len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
  {
         if (vcpu->mmio_read_completed) {
                 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-                              vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+                              vcpu->mmio_fragments[0].gpa, val);
                 vcpu->mmio_read_completed = 0;
                 return 1;
         }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
  
  static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
  {
-       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
         return vcpu_mmio_write(vcpu, gpa, bytes, val);
  }
  
  static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
                           void *val, int bytes)
  {
-       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
         return X86EMUL_IO_NEEDED;
  }
  
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
  
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
-       struct fpu *fpu = &current->thread.fpu;
         int r;
  
-       fpu__initialize(fpu);
-
         kvm_sigset_activate(vcpu);
  
+       kvm_load_guest_fpu(vcpu);
+
         if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                 if (kvm_run->immediate_exit) {
                         r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 }
         }
  
-       kvm_load_guest_fpu(vcpu);
-
         if (unlikely(vcpu->arch.complete_userspace_io)) {
                 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
                 vcpu->arch.complete_userspace_io = NULL;
                 r = cui(vcpu);
                 if (r <= 0)
-                       goto out_fpu;
+                       goto out;
         } else
                 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
  
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         else
                 r = vcpu_run(vcpu);
  
-out_fpu:
-       kvm_put_guest_fpu(vcpu);
  out:
+       kvm_put_guest_fpu(vcpu);
         post_kvm_run_save(vcpu);
         kvm_sigset_deactivate(vcpu);
  
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
  #endif
  
         kvm_rip_write(vcpu, regs->rip);
-       kvm_set_rflags(vcpu, regs->rflags);
+       kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
  
         vcpu->arch.exception.pending = false;
  
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
  }
  EXPORT_SYMBOL_GPL(kvm_task_switch);
  
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+       if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+               /*
+                * When EFER.LME and CR0.PG are set, the processor is in
+                * 64-bit mode (though maybe in a 32-bit code segment).
+                * CR4.PAE and EFER.LMA must be set.
+                */
+               if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+                   || !(sregs->efer & EFER_LMA))
+                       return -EINVAL;
+       } else {
+               /*
+                * Not in 64-bit mode: EFER.LMA is clear and the code
+                * segment cannot be 64-bit.
+                */
+               if (sregs->efer & EFER_LMA || sregs->cs.l)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                   struct kvm_sregs *sregs)
  {
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                         (sregs->cr4 & X86_CR4_OSXSAVE))
                 return -EINVAL;
  
+       if (kvm_valid_sregs(vcpu, sregs))
+               return -EINVAL;
+
         apic_base_msr.data = sregs->apic_base;
         apic_base_msr.host_initiated = true;
         if (kvm_set_apic_base(vcpu, &apic_base_msr))
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c

index 553f8fd23cc4733d0edafa862b95446f7a04bab1..4846eff7e4c8b1505501d7f1dcb64127d0a4c67c 100644 (file)
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
                 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
  
                 /*
-                * Use cpu_tss as a cacheline-aligned, seldomly
+                * Use cpu_tss_rw as a cacheline-aligned, seldomly
                  * accessed per-cpu variable as the monitor target.
                  */
-               __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+               __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
  
                 /*
                  * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt

index c4d55919fac19e06afbb00a4124fbf1b334b4d46..e0b85930dd773e87417e2b4957b8af61221b04c0 100644 (file)
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
  fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
  fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
  fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
  EndTable
  
  Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
  7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
  7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
  80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
  82: INVPCID Gy,Mdq (66)
  83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
  88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
  EndTable
  
  GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
  EndTable
  
  # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile

index 8e13b8cc6bedb0dc84eea64cd80ca6ae39037eaa..52195ee3f6d50ebd2005aa040b1cf0023edd6b33 100644 (file)
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o   = -pg
  endif
  
  obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
  
  # Make sure __phys_addr has no stackprotector
  nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c

new file mode 100644 (file)

index 0000000..fe814fd
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+       unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+       BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+       return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+       unsigned long va = (unsigned long) cea_vaddr;
+
+       set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+       for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+               cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+
+       /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
+       pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+        * a task gate needs to change an available TSS to busy.  If the
+        * GDT is read-only, that will triple fault.  The TSS cannot be
+        * read-only because the CPU writes to it on task switches.
+        *
+        * On Xen PV, the GDT must be read-only because the hypervisor
+        * requires it.
+        */
+       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+       pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+                   gdt_prot);
+
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+                            per_cpu_ptr(&entry_stack_storage, cpu), 1,
+                            PAGE_KERNEL);
+
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+        *
+        *  Avoid placing a page boundary in the part of the TSS that the
+        *  processor reads during a task switch (the first 104 bytes). The
+        *  processor may not correctly perform address translations if a
+        *  boundary occurs in this area. During a task switch, the processor
+        *  reads and writes into the first 104 bytes of each TSS (using
+        *  contiguous physical addresses beginning with the physical address
+        *  of the first byte of the TSS). So, after TSS access begins, if
+        *  part of the 104 bytes is not physically contiguous, the processor
+        *  will access incorrect information without generating a page-fault
+        *  exception.
+        *
+        * There are also a lot of errata involving the TSS spanning a page
+        * boundary.  Assert that we're not doing that.
+        */
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+                            &per_cpu(cpu_tss_rw, cpu),
+                            sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+                            &per_cpu(exception_stacks, cpu),
+                            sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+       unsigned long start, end;
+
+       BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+       BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+       start = CPU_ENTRY_AREA_BASE;
+       end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+       /* Careful here: start + PMD_SIZE might wrap around */
+       for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+               populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+       unsigned int cpu;
+
+       setup_cpu_entry_area_ptes();
+
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c

index 5e3ac6fe6c9e32ed1906f4f9bf736310a7193c7d..43dedbfb7257a32b538dc06773e13ddd55fd92dd 100644 (file)
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,10 +44,12 @@ struct addr_marker {
         unsigned long max_lines;
  };
  
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
  enum address_markers_idx {
         USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
         KERNEL_SPACE_NR,
         LOW_KERNEL_NR,
         VMALLOC_START_NR,
@@ -56,56 +58,74 @@ enum address_markers_idx {
         KASAN_SHADOW_START_NR,
         KASAN_SHADOW_END_NR,
  #endif
-# ifdef CONFIG_X86_ESPFIX64
+       CPU_ENTRY_AREA_NR,
+#ifdef CONFIG_X86_ESPFIX64
         ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+       EFI_END_NR,
+#endif
         HIGH_KERNEL_NR,
         MODULES_VADDR_NR,
         MODULES_END_NR,
-#else
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
+       [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
+#ifdef CONFIG_KASAN
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
+#endif
+       [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+       [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
+#endif
+       [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
+       [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
+       [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
+       [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+       USER_SPACE_NR = 0,
         KERNEL_SPACE_NR,
         VMALLOC_START_NR,
         VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
         PKMAP_BASE_NR,
-# endif
-       FIXADDR_START_NR,
  #endif
+       CPU_ENTRY_AREA_NR,
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
  };
  
-/* Address space markers hints */
  static struct addr_marker address_markers[] = {
-       { 0, "User Space" },
-#ifdef CONFIG_X86_64
-       { 0x8000000000000000UL, "Kernel Space" },
-       { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
-       { KASAN_SHADOW_START,   "KASAN shadow" },
-       { KASAN_SHADOW_END,     "KASAN shadow end" },
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
  #endif
-# ifdef CONFIG_X86_ESPFIX64
-       { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
-       { EFI_VA_END,           "EFI Runtime Services" },
-# endif
-       { __START_KERNEL_map,   "High Kernel Mapping" },
-       { MODULES_VADDR,        "Modules" },
-       { MODULES_END,          "End Modules" },
-#else
-       { PAGE_OFFSET,          "Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/*VMALLOC_END*/,     "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
-       { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
-# endif
-       { 0/*FIXADDR_START*/,   "Fixmap Area" },
-#endif
-       { -1, NULL }            /* End of list */
+       [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
+       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
  };
  
+#endif /* !CONFIG_X86_64 */
+
  /* Multipliers for offsets within the PTEs */
  #define PTE_LEVEL_MULT (PAGE_SIZE)
  #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
         static const char * const level_name[] =
                 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
  
-       if (!pgprot_val(prot)) {
+       if (!(pr & _PAGE_PRESENT)) {
                 /* Not present */
                 pt_dump_cont_printf(m, dmsg, "                              ");
         } else {
@@ -525,8 +545,8 @@ static int __init pt_dump_init(void)
         address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
  # endif
         address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+       address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
  #endif
-
         return 0;
  }
  __initcall(pt_dump_init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index febf6980e6535572f998cf2fa0ee63d296bdc6f1..06fe3d51d385b88111961c0b5addc673fcd597a2 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
         if (!printk_ratelimit())
                 return;
  
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
                 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                 tsk->comm, task_pid_nr(tsk), address,
                 (void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c

index 8a64a6f2848d9be2e73a341f4d87ab2dc35de09f..135c9a7898c7da908f1340f9750774b4327e63b3 100644 (file)
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
  #include <asm/setup.h>
  #include <asm/set_memory.h>
  #include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
  #include <asm/init.h>
  
  #include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
         mem_init_print_info(NULL);
         printk(KERN_INFO "virtual kernel memory layout:\n"
                 "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+               "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
  #ifdef CONFIG_HIGHMEM
                 "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
  #endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
                 FIXADDR_START, FIXADDR_TOP,
                 (FIXADDR_TOP - FIXADDR_START) >> 10,
  
+               CPU_ENTRY_AREA_BASE,
+               CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+               CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
  #ifdef CONFIG_HIGHMEM
                 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
                 (LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c

index 6e4573b1da341bd41095b11a692afcba51e4b850..c45b6ec5357bcd2e9f6626bd738c700cccd0a173 100644 (file)
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
                 return;
         }
  
+       mmiotrace_iounmap(addr);
+
         addr = (volatile void __iomem *)
                 (PAGE_MASK & (unsigned long __force)addr);
  
-       mmiotrace_iounmap(addr);
-
         /* Use the vm area unlocked, assuming the caller
            ensures there isn't another iounmap for the same address
            in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c

index 99dfed6dfef8b2f9028f82b89ab8dc2bde8173c4..47388f0c0e59649ca3574d4e7c31b356dad7d247 100644 (file)
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
  #include <asm/tlbflush.h>
  #include <asm/sections.h>
  #include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
  
  extern struct range pfn_mapped[E820_MAX_ENTRIES];
  
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
  void __init kasan_init(void)
  {
         int i;
+       void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
  
  #ifdef CONFIG_KASAN_INLINE
         register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
                 map_range(&pfn_mapped[i]);
         }
  
+       shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+                                               PAGE_SIZE);
+
+       shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+                                       CPU_ENTRY_AREA_MAP_SIZE);
+       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+                                       PAGE_SIZE);
+
         kasan_populate_zero_shadow(
                 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-               kasan_mem_to_shadow((void *)__START_KERNEL_map));
+               shadow_cpu_entry_begin);
+
+       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+                             (unsigned long)shadow_cpu_entry_end, 0);
+
+       kasan_populate_zero_shadow(shadow_cpu_entry_end,
+                               kasan_mem_to_shadow((void *)__START_KERNEL_map));
  
         kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
                               (unsigned long)kasan_mem_to_shadow(_end),
                               early_pfn_to_nid(__pa(_stext)));
  
         kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-                       (void *)KASAN_SHADOW_END);
+                               (void *)KASAN_SHADOW_END);
  
         load_cr3(init_top_pgt);
         __flush_tlb_all();
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c

index c21c2ed046120c8e12d439e71848200f11c41bd5..58477ec3d66d08acf07c1bc21bb9a55a78fcaa28 100644 (file)
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
         unsigned long flags;
         int ret = 0;
         unsigned long size = 0;
+       unsigned long addr = p->addr & PAGE_MASK;
         const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
         unsigned int l;
         pte_t *pte;
  
         spin_lock_irqsave(&kmmio_lock, flags);
-       if (get_kmmio_probe(p->addr)) {
+       if (get_kmmio_probe(addr)) {
                 ret = -EEXIST;
                 goto out;
         }
  
-       pte = lookup_address(p->addr, &l);
+       pte = lookup_address(addr, &l);
         if (!pte) {
                 ret = -EINVAL;
                 goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
         kmmio_count++;
         list_add_rcu(&p->list, &kmmio_probes);
         while (size < size_lim) {
-               if (add_kmmio_fault_page(p->addr + size))
+               if (add_kmmio_fault_page(addr + size))
                         pr_err("Unable to set page fault.\n");
                 size += page_level_size(l);
         }
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
  {
         unsigned long flags;
         unsigned long size = 0;
+       unsigned long addr = p->addr & PAGE_MASK;
         const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
         struct kmmio_fault_page *release_list = NULL;
         struct kmmio_delayed_release *drelease;
         unsigned int l;
         pte_t *pte;
  
-       pte = lookup_address(p->addr, &l);
+       pte = lookup_address(addr, &l);
         if (!pte)
                 return;
  
         spin_lock_irqsave(&kmmio_lock, flags);
         while (size < size_lim) {
-               release_kmmio_fault_page(p->addr + size, &release_list);
+               release_kmmio_fault_page(addr + size, &release_list);
                 size += page_level_size(l);
         }
         list_del_rcu(&p->list);
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c

index 6b9bf023a700559b87ae7ac89570d9bbd26d1f05..c3c5274410a908e762aed936406006d63c3116ac 100644 (file)
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
  #include <linux/pagemap.h>
  #include <linux/spinlock.h>
  
+#include <asm/cpu_entry_area.h>
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
  #include <asm/fixmap.h>
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index 3118392cdf756bfc913d7a4137d5f7e0d46b046d..0a1be3adc97eeefa1a45f12aa8b6e893162912b3 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
          * isn't free.
          */
  #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
                 /*
                  * If we were to BUG here, we'd be very likely to kill
                  * the system so hard that we don't see the call trace.
@@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 if (need_flush) {
                         this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
                         this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-                       write_cr3(build_cr3(next, new_asid));
+                       write_cr3(build_cr3(next->pgd, new_asid));
  
                         /*
                          * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                         trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                 } else {
                         /* The new ASID is already up to date. */
-                       write_cr3(build_cr3_noflush(next, new_asid));
+                       write_cr3(build_cr3_noflush(next->pgd, new_asid));
  
                         /* See above wrt _rcuidle. */
                         trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
                 !(cr4_read_shadow() & X86_CR4_PCIDE));
  
         /* Force ASID 0 and force a TLB flush. */
-       write_cr3(build_cr3(mm, 0));
+       write_cr3(build_cr3(mm->pgd, 0));
  
         /* Reinitialize tlbstate. */
         this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info)
  
         /* flush range by one by one 'invlpg' */
         for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-               __flush_tlb_single(addr);
+               __flush_tlb_one(addr);
  }
  
  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c

index 1e996df687a3bc47fb796c4fe8025bcb148a6890..e663d6bf1328ebe2327c990f9d19557a49a2124a 100644 (file)
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -665,6 +665,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
         unsigned i;
         u32 base, limit, high;
         struct resource *res, *conflict;
+       struct pci_dev *other;
+
+       /* Check that we are the only device of that type */
+       other = pci_get_device(dev->vendor, dev->device, NULL);
+       if (other != dev ||
+           (other = pci_get_device(dev->vendor, dev->device, other))) {
+               /* This is a multi-socket system, don't touch it for now */
+               pci_dev_put(other);
+               return;
+       }
  
         for (i = 0; i < 8; i++) {
                 pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base);
@@ -696,8 +706,13 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
         res->end = 0xfd00000000ull - 1;
  
         /* Just grab the free area behind system memory for this */
-       while ((conflict = request_resource_conflict(&iomem_resource, res)))
+       while ((conflict = request_resource_conflict(&iomem_resource, res))) {
+               if (conflict->end >= res->end) {
+                       kfree(res);
+                       return;
+               }
                 res->start = conflict->end + 1;
+       }
  
         dev_info(&dev->dev, "adding root bus resource %pR\n", res);
  
@@ -714,10 +729,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
  
         pci_bus_add_resource(dev->bus, res, 0);
  }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
  
  #endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c

index f44c0bc95aa2f45ad42462a5f23f4db4672d1257..8538a6723171a5606058a8823ed1cbb2d343fdb6 100644 (file)
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
                 local_flush_tlb();
                 stat->d_alltlb++;
         } else {
-               __flush_tlb_one(msg->address);
+               __flush_tlb_single(msg->address);
                 stat->d_onetlb++;
         }
         stat->d_requestee++;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c

index 5191de14f4df9aea4e452925cda5b9845d8d1bf4..a7d966964c6f20577c927cf5e618bc86b3331977 100644 (file)
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
         /*
          * descriptor tables
          */
-#ifdef CONFIG_X86_32
         store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-       store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
         /*
          * We save it here, but restore it only in the hibernate case.
          * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
         /*
          * segment registers
          */
-#ifdef CONFIG_X86_32
-       savesegment(es, ctxt->es);
-       savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
         savesegment(gs, ctxt->gs);
-       savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
-       asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
-       asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
-       asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
-       asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
-       asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+       savesegment(gs, ctxt->gs);
+       savesegment(fs, ctxt->fs);
+       savesegment(ds, ctxt->ds);
+       savesegment(es, ctxt->es);
  
         rdmsrl(MSR_FS_BASE, ctxt->fs_base);
-       rdmsrl(MSR_GS_BASE, ctxt->gs_base);
-       rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+       rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+       rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
         mtrr_save_fixed_ranges(NULL);
  
         rdmsrl(MSR_EFER, ctxt->efer);
@@ -160,17 +152,19 @@ static void do_fpu_end(void)
  static void fix_processor_context(void)
  {
         int cpu = smp_processor_id();
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
  #ifdef CONFIG_X86_64
         struct desc_struct *desc = get_cpu_gdt_rw(cpu);
         tss_desc tss;
  #endif
-       set_tss_desc(cpu, t);   /*
-                                * This just modifies memory; should not be
-                                * necessary. But... This is necessary, because
-                                * 386 hardware has concept of busy TSS or some
-                                * similar stupidity.
-                                */
+
+       /*
+        * We need to reload TR, which requires that we change the
+        * GDT entry to indicate "available" first.
+        *
+        * XXX: This could probably all be replaced by a call to
+        * force_reload_TR().
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
  
  #ifdef CONFIG_X86_64
         memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -178,6 +172,9 @@ static void fix_processor_context(void)
         write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
  
         syscall_init();                         /* This sets MSR_*STAR and related */
+#else
+       if (boot_cpu_has(X86_FEATURE_SEP))
+               enable_sep_cpu();
  #endif
         load_TR_desc();                         /* This does ltr */
         load_mm_ldt(current->active_mm);        /* This does lldt */
@@ -190,9 +187,12 @@ static void fix_processor_context(void)
  }
  
  /**
- *     __restore_processor_state - restore the contents of CPU registers saved
- *             by __save_processor_state()
- *     @ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ *                             by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
   */
  static void notrace __restore_processor_state(struct saved_context *ctxt)
  {
@@ -215,57 +215,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
         write_cr2(ctxt->cr2);
         write_cr0(ctxt->cr0);
  
+       /* Restore the IDT. */
+       load_idt(&ctxt->idt);
+
         /*
-        * now restore the descriptor tables to their proper values
-        * ltr is done i fix_processor_context().
+        * Just in case the asm code got us here with the SS, DS, or ES
+        * out of sync with the GDT, update them.
          */
-#ifdef CONFIG_X86_32
-       load_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
-       load_idt((const struct desc_ptr *)&ctxt->idt_limit);
-#endif
+       loadsegment(ss, __KERNEL_DS);
+       loadsegment(ds, __USER_DS);
+       loadsegment(es, __USER_DS);
  
-#ifdef CONFIG_X86_64
         /*
-        * We need GSBASE restored before percpu access can work.
-        * percpu access can happen in exception handlers or in complicated
-        * helpers like load_gs_index().
+        * Restore percpu access.  Percpu access can happen in exception
+        * handlers or in complicated helpers like load_gs_index().
          */
-       wrmsrl(MSR_GS_BASE, ctxt->gs_base);
+#ifdef CONFIG_X86_64
+       wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+#else
+       loadsegment(fs, __KERNEL_PERCPU);
+       loadsegment(gs, __KERNEL_STACK_CANARY);
  #endif
  
+       /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
         fix_processor_context();
  
         /*
-        * Restore segment registers.  This happens after restoring the GDT
-        * and LDT, which happen in fix_processor_context().
+        * Now that we have descriptor tables fully restored and working
+        * exception handling, restore the usermode segments.
          */
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+       loadsegment(ds, ctxt->es);
         loadsegment(es, ctxt->es);
         loadsegment(fs, ctxt->fs);
-       loadsegment(gs, ctxt->gs);
-       loadsegment(ss, ctxt->ss);
-
-       /*
-        * sysenter MSRs
-        */
-       if (boot_cpu_has(X86_FEATURE_SEP))
-               enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
-       asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
-       asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
-       asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
         load_gs_index(ctxt->gs);
-       asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
  
         /*
-        * Restore FSBASE and user GSBASE after reloading the respective
-        * segment selectors.
+        * Restore FSBASE and GSBASE after restoring the selectors, since
+        * restoring the selectors clobbers the bases.  Keep in mind
+        * that MSR_KERNEL_GS_BASE is horribly misnamed.
          */
         wrmsrl(MSR_FS_BASE, ctxt->fs_base);
-       wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+       wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+       loadsegment(gs, ctxt->gs);
  #endif
  
         do_fpu_end();
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c

index 6b830d4cb4c8e8e78c44dd87a12f642234533b4e..de58533d3664cdf067aedecfc81af2729c3b539b 100644 (file)
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
                 return 0;
  
         if (reg == APIC_LVR)
-               return 0x10;
+               return 0x14;
  #ifdef CONFIG_X86_32
         if (reg == APIC_LDR)
                 return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c

index d669e9d890017770456abe458f1161eb2509c09e..c9081c6671f0b7a05ecfaaf206e7e1ed2b1f456a 100644 (file)
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
  #include <linux/cpu.h>
  #include <linux/kexec.h>
  
  #include <xen/features.h>
  #include <xen/page.h>
+#include <xen/interface/memory.h>
  
  #include <asm/xen/hypercall.h>
  #include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
  }
  EXPORT_SYMBOL(xen_arch_unregister_cpu);
  #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+       struct xen_memory_map memmap;
+       int rc;
+       unsigned int i, last_guest_ram;
+       phys_addr_t max_addr = PFN_PHYS(max_pfn);
+       struct e820_table *xen_e820_table;
+       const struct e820_entry *entry;
+       struct resource *res;
+
+       if (!xen_initial_domain())
+               return;
+
+       xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+       if (!xen_e820_table)
+               return;
+
+       memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+       set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+       rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+       if (rc) {
+               pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+               goto out;
+       }
+
+       last_guest_ram = 0;
+       for (i = 0; i < memmap.nr_entries; i++) {
+               if (xen_e820_table->entries[i].addr >= max_addr)
+                       break;
+               if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+                       last_guest_ram = i;
+       }
+
+       entry = &xen_e820_table->entries[last_guest_ram];
+       if (max_addr >= entry->addr + entry->size)
+               goto out; /* No unallocated host RAM. */
+
+       hostmem_resource->start = max_addr;
+       hostmem_resource->end = entry->addr + entry->size;
+
+       /*
+        * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+        * as unavailable. The rest of that region can be used for hotplug-based
+        * ballooning.
+        */
+       for (; i < memmap.nr_entries; i++) {
+               entry = &xen_e820_table->entries[i];
+
+               if (entry->type == E820_TYPE_RAM)
+                       continue;
+
+               if (entry->addr >= hostmem_resource->end)
+                       break;
+
+               res = kzalloc(sizeof(*res), GFP_KERNEL);
+               if (!res)
+                       goto out;
+
+               res->name = "Unavailable host RAM";
+               res->start = entry->addr;
+               res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+                           entry->addr + entry->size : hostmem_resource->end;
+               rc = insert_resource(hostmem_resource, res);
+               if (rc) {
+                       pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+                               __func__, res->start, res->end, rc);
+                       kfree(res);
+                       goto  out;
+               }
+       }
+
+ out:
+       kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c

index f2414c6c5e7c455b43fc45773fbd1264cf86c24e..c047f42552e1a61ed0a5787d904681974cc05af1 100644 (file)
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
  #include "multicalls.h"
  #include "pmu.h"
  
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
  void *xen_initial_gdt;
  
  static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
         mcs = xen_mc_entry(0);
         MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
         xen_mc_issue(PARAVIRT_LAZY_CPU);
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
  }
  
  void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
         __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
  
         /* Work out if we support NX */
+       get_cpu_cap(&boot_cpu_data);
         x86_configure_nx();
  
         /* Get mfn list */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c

index fc048ec686e7699b263254c79b482ccf935c21ef..4d62c071b166f65c848a12ca07bfe44ca20e198a 100644 (file)
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
         /* Graft it onto L4[511][510] */
         copy_page(level2_kernel_pgt, l2);
  
+       /*
+        * Zap execute permission from the ident map. Due to the sharing of
+        * L1 entries we need to do this in the L2.
+        */
+       if (__supported_pte_mask & _PAGE_NX) {
+               for (i = 0; i < PTRS_PER_PMD; ++i) {
+                       if (pmd_none(level2_ident_pgt[i]))
+                               continue;
+                       level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+               }
+       }
+
         /* Copy the initial P->M table mappings if necessary. */
         i = pgd_index(xen_start_info->mfn_list);
         if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  
         switch (idx) {
         case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-       case FIX_RO_IDT:
  #ifdef CONFIG_X86_32
         case FIX_WP_TEST:
  # ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  #endif
         case FIX_TEXT_POKE0:
         case FIX_TEXT_POKE1:
-       case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
                 /* All local page mappings */
                 pte = pfn_pte(phys, prot);
                 break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c

index c114ca767b3b8a382918e2b0160983fa257318db..6e0d2086eacbf37326467b5142e59750151a5328 100644 (file)
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
         addr = xen_e820_table.entries[0].addr;
         size = xen_e820_table.entries[0].size;
         while (i < xen_e820_table.nr_entries) {
-               bool discard = false;
  
                 chunk_size = size;
                 type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
                                 xen_add_extra_mem(pfn_s, n_pfns);
                                 xen_max_p2m_pfn = pfn_s + n_pfns;
                         } else
-                               discard = true;
+                               type = E820_TYPE_UNUSABLE;
                 }
  
-               if (!discard)
-                       xen_align_and_add_e820_region(addr, chunk_size, type);
+               xen_align_and_add_e820_region(addr, chunk_size, type);
  
                 addr += chunk_size;
                 size -= chunk_size;
diff --git a/block/bio.c b/block/bio.c

index 8bfdea58159ba9ffd972dd95717e0eee99101e0a..9ef6cf3addb38cae822d0e5c5ef18ba9e98cd2d7 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
         bio->bi_disk = bio_src->bi_disk;
         bio->bi_partno = bio_src->bi_partno;
         bio_set_flag(bio, BIO_CLONED);
+       if (bio_flagged(bio_src, BIO_THROTTLED))
+               bio_set_flag(bio, BIO_THROTTLED);
         bio->bi_opf = bio_src->bi_opf;
         bio->bi_write_hint = bio_src->bi_write_hint;
         bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-map.c b/block/blk-map.c

index b21f8e86f1207f9b76bf3e2083fcf72b5062f0b7..d3a94719f03fb2af81d6270d6fc9ed58f0dde373 100644 (file)
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -12,22 +12,29 @@
  #include "blk.h"
  
  /*
- * Append a bio to a passthrough request.  Only works can be merged into
- * the request based on the driver constraints.
+ * Append a bio to a passthrough request.  Only works if the bio can be merged
+ * into the request based on the driver constraints.
   */
-int blk_rq_append_bio(struct request *rq, struct bio *bio)
+int blk_rq_append_bio(struct request *rq, struct bio **bio)
  {
-       blk_queue_bounce(rq->q, &bio);
+       struct bio *orig_bio = *bio;
+
+       blk_queue_bounce(rq->q, bio);
  
         if (!rq->bio) {
-               blk_rq_bio_prep(rq->q, rq, bio);
+               blk_rq_bio_prep(rq->q, rq, *bio);
         } else {
-               if (!ll_back_merge_fn(rq->q, rq, bio))
+               if (!ll_back_merge_fn(rq->q, rq, *bio)) {
+                       if (orig_bio != *bio) {
+                               bio_put(*bio);
+                               *bio = orig_bio;
+                       }
                         return -EINVAL;
+               }
  
-               rq->biotail->bi_next = bio;
-               rq->biotail = bio;
-               rq->__data_len += bio->bi_iter.bi_size;
+               rq->biotail->bi_next = *bio;
+               rq->biotail = *bio;
+               rq->__data_len += (*bio)->bi_iter.bi_size;
         }
  
         return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
          * We link the bounce buffer in and could have to traverse it
          * later so we have to get a ref to prevent it from being freed
          */
-       ret = blk_rq_append_bio(rq, bio);
-       bio_get(bio);
+       ret = blk_rq_append_bio(rq, &bio);
         if (ret) {
-               bio_endio(bio);
                 __blk_rq_unmap_user(orig_bio);
-               bio_put(bio);
                 return ret;
         }
+       bio_get(bio);
  
         return 0;
  }
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
         int reading = rq_data_dir(rq) == READ;
         unsigned long addr = (unsigned long) kbuf;
         int do_copy = 0;
-       struct bio *bio;
+       struct bio *bio, *orig_bio;
         int ret;
  
         if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
         if (do_copy)
                 rq->rq_flags |= RQF_COPY_USER;
  
-       ret = blk_rq_append_bio(rq, bio);
+       orig_bio = bio;
+       ret = blk_rq_append_bio(rq, &bio);
         if (unlikely(ret)) {
                 /* request is too big */
-               bio_put(bio);
+               bio_put(orig_bio);
                 return ret;
         }
  
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index 825bc29767e6699ac85675d319a9866b70cc9b84..d19f416d61012ac032c49608f0afe463c948e8bc 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2226,13 +2226,7 @@ again:
  out_unlock:
         spin_unlock_irq(q->queue_lock);
  out:
-       /*
-        * As multiple blk-throtls may stack in the same issue path, we
-        * don't want bios to leave with the flag set.  Clear the flag if
-        * being issued.
-        */
-       if (!throttled)
-               bio_clear_flag(bio, BIO_THROTTLED);
+       bio_set_flag(bio, BIO_THROTTLED);
  
  #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
         if (throttled || !td->track_bio_latency)
diff --git a/block/bounce.c b/block/bounce.c

index fceb1a96480bfb9600e4664fa2b4992c8bb64210..1d05c422c932ad56d705f94deed6cce0891ff9d3 100644 (file)
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
         unsigned i = 0;
         bool bounce = false;
         int sectors = 0;
+       bool passthrough = bio_is_passthrough(*bio_orig);
  
         bio_for_each_segment(from, *bio_orig, iter) {
                 if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
         if (!bounce)
                 return;
  
-       if (sectors < bio_sectors(*bio_orig)) {
+       if (!passthrough && sectors < bio_sectors(*bio_orig)) {
                 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
                 bio_chain(bio, *bio_orig);
                 generic_make_request(*bio_orig);
                 *bio_orig = bio;
         }
-       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
+       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+                       bounce_bio_set);
  
         bio_for_each_segment_all(to, bio, i) {
                 struct page *page = to->bv_page;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c

index b4df317c291692f01138b91608dc6c80f71bb9aa..f95c60774ce8ca613417d3ccf54bee52010752ee 100644 (file)
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
         unsigned int cur_domain;
         unsigned int batching;
         wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+       struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
         atomic_t wait_index[KYBER_NUM_DOMAINS];
  };
  
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+                            void *key);
+
  static int rq_sched_domain(const struct request *rq)
  {
         unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  
         for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
                 INIT_LIST_HEAD(&khd->rqs[i]);
+               init_waitqueue_func_entry(&khd->domain_wait[i],
+                                         kyber_domain_wake);
+               khd->domain_wait[i].private = hctx;
                 INIT_LIST_HEAD(&khd->domain_wait[i].entry);
                 atomic_set(&khd->wait_index[i], 0);
         }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
         int nr;
  
         nr = __sbitmap_queue_get(domain_tokens);
-       if (nr >= 0)
-               return nr;
  
         /*
          * If we failed to get a domain token, make sure the hardware queue is
          * run when one becomes available. Note that this is serialized on
          * khd->lock, but we still need to be careful about the waker.
          */
-       if (list_empty_careful(&wait->entry)) {
-               init_waitqueue_func_entry(wait, kyber_domain_wake);
-               wait->private = hctx;
+       if (nr < 0 && list_empty_careful(&wait->entry)) {
                 ws = sbq_wait_ptr(domain_tokens,
                                   &khd->wait_index[sched_domain]);
+               khd->domain_ws[sched_domain] = ws;
                 add_wait_queue(&ws->wait, wait);
  
                 /*
                  * Try again in case a token was freed before we got on the wait
-                * queue. The waker may have already removed the entry from the
-                * wait queue, but list_del_init() is okay with that.
+                * queue.
                  */
                 nr = __sbitmap_queue_get(domain_tokens);
-               if (nr >= 0) {
-                       unsigned long flags;
+       }
  
-                       spin_lock_irqsave(&ws->wait.lock, flags);
-                       list_del_init(&wait->entry);
-                       spin_unlock_irqrestore(&ws->wait.lock, flags);
-               }
+       /*
+        * If we got a token while we were on the wait queue, remove ourselves
+        * from the wait queue to ensure that all wake ups make forward
+        * progress. It's possible that the waker already deleted the entry
+        * between the !list_empty_careful() check and us grabbing the lock, but
+        * list_del_init() is okay with that.
+        */
+       if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+               ws = khd->domain_ws[sched_domain];
+               spin_lock_irq(&ws->wait.lock);
+               list_del_init(&wait->entry);
+               spin_unlock_irq(&ws->wait.lock);
         }
+
         return nr;
  }
  
diff --git a/crypto/af_alg.c b/crypto/af_alg.c

index 358749c38894e31481fb0d903c0d1f7504311aa8..444a387df219e96a35fb7972f2a1a810e013a60b 100644 (file)
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -672,14 +672,15 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
         }
  
         tsgl = areq->tsgl;
-       for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
-               if (!sg_page(sg))
-                       continue;
-               put_page(sg_page(sg));
-       }
+       if (tsgl) {
+               for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+                       if (!sg_page(sg))
+                               continue;
+                       put_page(sg_page(sg));
+               }
  
-       if (areq->tsgl && areq->tsgl_entries)
                 sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+       }
  }
  EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
  
@@ -1137,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                 if (!af_alg_readable(sk))
                         break;
  
-               if (!ctx->used) {
-                       err = af_alg_wait_for_data(sk, flags);
-                       if (err)
-                               return err;
-               }
-
                 seglen = min_t(size_t, (maxsize - len),
                                msg_data_left(msg));
  
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c

index 805f485ddf1be4711a9d2ec47998964543f1d217..ddcc45f77edd367bf118e46aa757891c5c3d8869 100644 (file)
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
         size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
         size_t processed = 0;           /* [in]  TX bufs to be consumed */
  
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
         /*
          * Data length provided by caller via sendmsg/sendpage that has not
          * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                 /* AIO operation */
                 sock_hold(sk);
                 areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = outlen;
+
                 aead_request_set_callback(&areq->cra_u.aead_req,
                                           CRYPTO_TFM_REQ_MAY_BACKLOG,
                                           af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                                  crypto_aead_decrypt(&areq->cra_u.aead_req);
  
                 /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = outlen;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                         return -EIOCBQUEUED;
-               }
  
                 sock_put(sk);
         } else {
@@ -503,6 +509,7 @@ static void aead_release(void *private)
         struct aead_tfm *tfm = private;
  
         crypto_free_aead(tfm->aead);
+       crypto_put_default_null_skcipher2();
         kfree(tfm);
  }
  
@@ -535,7 +542,6 @@ static void aead_sock_destruct(struct sock *sk)
         unsigned int ivlen = crypto_aead_ivsize(tfm);
  
         af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
-       crypto_put_default_null_skcipher2();
         sock_kzfree_s(sk, ctx->iv, ivlen);
         sock_kfree_s(sk, ctx, ctx->len);
         af_alg_release_parent(sk);
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c

index 30cff827dd8fff048fa3e2ca7de770ab73022749..baef9bfccddaa94728bea5933bea16c32b2a32b5 100644 (file)
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
         int err = 0;
         size_t len = 0;
  
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
         /* Allocate cipher request for current operation. */
         areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
                                      crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                 /* AIO operation */
                 sock_hold(sk);
                 areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = len;
+
                 skcipher_request_set_callback(&areq->cra_u.skcipher_req,
                                               CRYPTO_TFM_REQ_MAY_SLEEP,
                                               af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                         crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
  
                 /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = len;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                         return -EIOCBQUEUED;
-               }
  
                 sock_put(sk);
         } else {
diff --git a/crypto/hmac.c b/crypto/hmac.c

index 92871dc2a63ec66ca628df3a44b025b7ef6f247e..e74730224f0a5f6346bb8ae7b80f3ed5e6cb6281 100644 (file)
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -195,11 +195,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb)
         salg = shash_attr_alg(tb[1], 0, 0);
         if (IS_ERR(salg))
                 return PTR_ERR(salg);
+       alg = &salg->base;
  
+       /* The underlying hash algorithm must be unkeyed */
         err = -EINVAL;
+       if (crypto_shash_alg_has_setkey(salg))
+               goto out_put_alg;
+
         ds = salg->digestsize;
         ss = salg->statesize;
-       alg = &salg->base;
         if (ds > alg->cra_blocksize ||
             ss < alg->cra_blocksize)
                 goto out_put_alg;
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c

index 4e64726588524f137acd590809bef11673695ed2..eca04d3729b37c696c2dac4b0ac472422f30615d 100644 (file)
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
                 pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
                 crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
                 INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+               spin_lock_init(&cpu_queue->q_lock);
         }
         return 0;
  }
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
         int cpu, err;
         struct mcryptd_cpu_queue *cpu_queue;
  
-       cpu = get_cpu();
-       cpu_queue = this_cpu_ptr(queue->cpu_queue);
-       rctx->tag.cpu = cpu;
+       cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+       spin_lock(&cpu_queue->q_lock);
+       cpu = smp_processor_id();
+       rctx->tag.cpu = smp_processor_id();
  
         err = crypto_enqueue_request(&cpu_queue->queue, request);
         pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
                  cpu, cpu_queue, request);
+       spin_unlock(&cpu_queue->q_lock);
         queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-       put_cpu();
  
         return err;
  }
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
         cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
         i = 0;
         while (i < MCRYPTD_BATCH || single_task_running()) {
-               /*
-                * preempt_disable/enable is used to prevent
-                * being preempted by mcryptd_enqueue_request()
-                */
-               local_bh_disable();
-               preempt_disable();
+
+               spin_lock_bh(&cpu_queue->q_lock);
                 backlog = crypto_get_backlog(&cpu_queue->queue);
                 req = crypto_dequeue_request(&cpu_queue->queue);
-               preempt_enable();
-               local_bh_enable();
+               spin_unlock_bh(&cpu_queue->q_lock);
  
                 if (!req) {
                         mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
                 ++i;
         }
         if (cpu_queue->queue.qlen)
-               queue_work(kcrypto_wq, &cpu_queue->work);
+               queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
  }
  
  void mcryptd_flusher(struct work_struct *__work)
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c

index 0b66dc8246068aa084dd0b44210b04dee5f2bccb..cad395d70d78e18527866bf1a3f6452c338e4c7c 100644 (file)
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
                 return -EINVAL;
  
         if (fips_enabled) {
-               while (!*ptr && n_sz) {
+               while (n_sz && !*ptr) {
                         ptr++;
                         n_sz--;
                 }
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c

index f550b5d9463074b16670129341de59e069f8509c..d7da0eea5622af96f63300ad45c35450951551e1 100644 (file)
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc,
  
         salsa20_ivsetup(ctx, walk.iv);
  
-       if (likely(walk.nbytes == nbytes))
-       {
-               salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-                                     walk.src.virt.addr, nbytes);
-               return blkcipher_walk_done(desc, &walk, 0);
-       }
-
         while (walk.nbytes >= 64) {
                 salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
                                       walk.src.virt.addr,
diff --git a/crypto/shash.c b/crypto/shash.c

index 325a14da58278f01b8c1ffd92bdd8990db2860c4..e849d3ee2e2728d346df1f21f6a8d4db57fc42c5 100644 (file)
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -25,11 +25,12 @@
  
  static const struct crypto_type crypto_shash_type;
  
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
-                          unsigned int keylen)
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+                   unsigned int keylen)
  {
         return -ENOSYS;
  }
+EXPORT_SYMBOL_GPL(shash_no_setkey);
  
  static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
                                   unsigned int keylen)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c

index 778e0ff42bfa801eda5be848da9e6747ebbc2626..11af5fd6a443570550e1dac5b0a429b2cae801b1 100644 (file)
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
  
         walk->total = req->cryptlen;
         walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
  
         if (unlikely(!walk->total))
                 return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
         scatterwalk_start(&walk->in, req->src);
         scatterwalk_start(&walk->out, req->dst);
  
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
         walk->flags &= ~SKCIPHER_WALK_SLEEP;
         walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
                        SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
         int err;
  
         walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
  
         if (unlikely(!walk->total))
                 return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
         scatterwalk_done(&walk->in, 0, walk->total);
         scatterwalk_done(&walk->out, 0, walk->total);
  
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
         if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
                 walk->flags |= SKCIPHER_WALK_SLEEP;
         else
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c

index 6742f6c68034c5e833505d294902dd97c274c1b0..9bff853e85f37831d8d053a2aa363f139537c9b5 100644 (file)
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -1007,7 +1007,7 @@ skip:
         /* The record may be cleared by others, try read next record */
         if (len == -ENOENT)
                 goto skip;
-       else if (len < sizeof(*rcd)) {
+       else if (len < 0 || len < sizeof(*rcd)) {
                 rc = -EIO;
                 goto out;
         }
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c

index 30e84cc600ae6438c25aec2f2975ae4e3f144553..06ea4749ebd9826a3d7b8b0a9798a1cc797f4d61 100644 (file)
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
         struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
         struct cpc_register_resource *desired_reg;
         int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
-       struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+       struct cppc_pcc_data *pcc_ss_data;
         int ret = 0;
  
         if (!cpc_desc || pcc_ss_id < 0) {
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c

index e4ffaeec9ec204110f7e50c5cff49172a486f081..a4c8ad98560dc4a3a4e21bd1bbb825bd73d237b7 100644 (file)
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1138,7 +1138,7 @@ int acpi_subsys_thaw_noirq(struct device *dev)
          * skip all of the subsequent "thaw" callbacks for the device.
          */
         if (dev_pm_smart_suspend_and_suspended(dev)) {
-               dev->power.direct_complete = true;
+               dev_pm_skip_next_resume_phases(dev);
                 return 0;
         }
  
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index ff2580e7611d18c6d56c58d50c2cbc3a2d54aa36..abeb4df4f22e43d7f0d1398af9962135a37af4b6 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                                 dev_name(&adev_dimm->dev));
                 return -ENXIO;
         }
+       /*
+        * Record nfit_mem for the notification path to track back to
+        * the nfit sysfs attributes for this dimm device object.
+        */
+       dev_set_drvdata(&adev_dimm->dev, nfit_mem);
  
         /*
          * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
                         sysfs_put(nfit_mem->flags_attr);
                         nfit_mem->flags_attr = NULL;
                 }
-               if (adev_dimm)
+               if (adev_dimm) {
                         acpi_remove_notify_handler(adev_dimm->handle,
                                         ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+                       dev_set_drvdata(&adev_dimm->dev, NULL);
+               }
         }
         mutex_unlock(&acpi_desc->init_mutex);
  }
diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c

index 80854f71559a319fc8fb0ca4b0dd15eb7aec6460..0ae6971c2a4cfcd3bba93856335f4a031137385c 100644 (file)
--- a/drivers/ata/ahci_mtk.c
+++ b/drivers/ata/ahci_mtk.c
@@ -1,5 +1,5 @@
  /*
- * MeidaTek AHCI SATA driver
+ * MediaTek AHCI SATA driver
   *
   * Copyright (c) 2017 MediaTek Inc.
   * Author: Ryder Lee <ryder.lee@mediatek.com>
@@ -25,7 +25,7 @@
  #include <linux/reset.h>
  #include "ahci.h"
  
-#define DRV_NAME               "ahci"
+#define DRV_NAME               "ahci-mtk"
  
  #define SYS_CFG                        0x14
  #define SYS_CFG_SATA_MSK       GENMASK(31, 30)
@@ -192,5 +192,5 @@ static struct platform_driver mtk_ahci_driver = {
  };
  module_platform_driver(mtk_ahci_driver);
  
-MODULE_DESCRIPTION("MeidaTek SATA AHCI Driver");
+MODULE_DESCRIPTION("MediaTek SATA AHCI Driver");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c

index b6b0bf76dfc7bb7fe90f45418aab974bf73b6f87..2685f28160f70764ee4013930239566031c9b058 100644 (file)
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -35,6 +35,8 @@
  
  /* port register default value */
  #define AHCI_PORT_PHY_1_CFG    0xa003fffe
+#define AHCI_PORT_PHY2_CFG     0x28184d1f
+#define AHCI_PORT_PHY3_CFG     0x0e081509
  #define AHCI_PORT_TRANS_CFG    0x08000029
  #define AHCI_PORT_AXICC_CFG    0x3fffffff
  
@@ -183,6 +185,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
                 writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
                                 qpriv->ecc_addr);
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+               writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+               writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
                         writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -190,6 +194,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
  
         case AHCI_LS2080A:
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+               writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+               writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
                         writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -201,6 +207,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
                 writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
                                 qpriv->ecc_addr);
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+               writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+               writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
                         writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -212,6 +220,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
                 writel(readl(qpriv->ecc_addr) | ECC_DIS_LS1088A,
                        qpriv->ecc_addr);
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+               writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+               writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
                         writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
@@ -219,6 +229,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
  
         case AHCI_LS2088A:
                 writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+               writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2);
+               writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3);
                 writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                 if (qpriv->is_dmacoherent)
                         writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c

index 2a882929de4aa3cfcce6b46de459ee7f7360a075..8193b38a1cae7a8d738fb4c29828654dd02f0572 100644 (file)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3082,13 +3082,19 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit)
         bit = fls(mask) - 1;
         mask &= ~(1 << bit);
  
-       /* Mask off all speeds higher than or equal to the current
-        * one.  Force 1.5Gbps if current SPD is not available.
+       /*
+        * Mask off all speeds higher than or equal to the current one.  At
+        * this point, if current SPD is not available and we previously
+        * recorded the link speed from SStatus, the driver has already
+        * masked off the highest bit so mask should already be 1 or 0.
+        * Otherwise, we should not force 1.5Gbps on a link where we have
+        * not previously recorded speed from SStatus.  Just return in this
+        * case.
          */
         if (spd > 1)
                 mask &= (1 << (spd - 1)) - 1;
         else
-               mask &= 1;
+               return -EINVAL;
  
         /* were we already at the bottom? */
         if (!mask)
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c

index ffd8d33c6e0f044dece4e4bcc4560aebd68290ac..6db2e34bd52f2e5521e72ac2b17e1ee1fe43cc44 100644 (file)
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -82,7 +82,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed
   * is issued to the device. However, if the controller clock is 133MHz,
   * the following tables must be used.
   */
-static struct pdc2027x_pio_timing {
+static const struct pdc2027x_pio_timing {
         u8 value0, value1, value2;
  } pdc2027x_pio_timing_tbl[] = {
         { 0xfb, 0x2b, 0xac }, /* PIO mode 0 */
@@ -92,7 +92,7 @@ static struct pdc2027x_pio_timing {
         { 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */
  };
  
-static struct pdc2027x_mdma_timing {
+static const struct pdc2027x_mdma_timing {
         u8 value0, value1;
  } pdc2027x_mdma_timing_tbl[] = {
         { 0xdf, 0x5f }, /* MDMA mode 0 */
@@ -100,7 +100,7 @@ static struct pdc2027x_mdma_timing {
         { 0x69, 0x25 }, /* MDMA mode 2 */
  };
  
-static struct pdc2027x_udma_timing {
+static const struct pdc2027x_udma_timing {
         u8 value0, value1, value2;
  } pdc2027x_udma_timing_tbl[] = {
         { 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */
@@ -649,7 +649,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host)
   * @host: target ATA host
   * @board_idx: board identifier
   */
-static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
+static void pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
  {
         long pll_clock;
  
@@ -665,8 +665,6 @@ static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx)
  
         /* Adjust PLL control register */
         pdc_adjust_pll(host, pll_clock, board_idx);
-
-       return 0;
  }
  
  /**
@@ -753,8 +751,7 @@ static int pdc2027x_init_one(struct pci_dev *pdev,
         //pci_enable_intx(pdev);
  
         /* initialize adapter */
-       if (pdc_hardware_init(host, board_idx) != 0)
-               return -EIO;
+       pdc_hardware_init(host, board_idx);
  
         pci_set_master(pdev);
         return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt,
@@ -778,8 +775,7 @@ static int pdc2027x_reinit_one(struct pci_dev *pdev)
         else
                 board_idx = PDC_UDMA_133;
  
-       if (pdc_hardware_init(host, board_idx))
-               return -EIO;
+       pdc_hardware_init(host, board_idx);
  
         ata_host_resume(host);
         return 0;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c

index db2f044159274a35457f4f1e5b3a55d226f08cd2..08744b572af6a25184d274ba91304e19ec2be732 100644 (file)
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -525,6 +525,21 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd)
  
  /*------------------------- Resume routines -------------------------*/
  
+/**
+ * dev_pm_skip_next_resume_phases - Skip next system resume phases for device.
+ * @dev: Target device.
+ *
+ * Make the core skip the "early resume" and "resume" phases for @dev.
+ *
+ * This function can be called by middle-layer code during the "noirq" phase of
+ * system resume if necessary, but not by device drivers.
+ */
+void dev_pm_skip_next_resume_phases(struct device *dev)
+{
+       dev->power.is_late_suspended = false;
+       dev->power.is_suspended = false;
+}
+
  /**
   * device_resume_noirq - Execute a "noirq resume" callback for given device.
   * @dev: Device to handle.
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c

index ccb9975a97fa3f214d658776450ab618bae26643..ad0477ae820f040affe54f4368d3a02d9da63350 100644 (file)
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
  struct nullb_cmd {
         struct list_head list;
         struct llist_node ll_list;
-       call_single_data_t csd;
+       struct __call_single_data csd;
         struct request *rq;
         struct bio *bio;
         unsigned int tag;
+       blk_status_t error;
         struct nullb_queue *nq;
         struct hrtimer timer;
-       blk_status_t error;
  };
  
  struct nullb_queue {
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c

index 779869ed32b1516261e80fffd440b3ca1e1132ea..71fad747c0c7c1052cc19ee3bad0568b4a80c55b 100644 (file)
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -199,6 +199,9 @@ struct smi_info {
         /* The timer for this si. */
         struct timer_list   si_timer;
  
+       /* This flag is set, if the timer can be set */
+       bool                timer_can_start;
+
         /* This flag is set, if the timer is running (timer_pending() isn't enough) */
         bool                timer_running;
  
@@ -355,6 +358,8 @@ out:
  
  static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
  {
+       if (!smi_info->timer_can_start)
+               return;
         smi_info->last_timeout_jiffies = jiffies;
         mod_timer(&smi_info->si_timer, new_val);
         smi_info->timer_running = true;
@@ -374,21 +379,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
         smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
  }
  
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
  {
         unsigned char msg[2];
  
         msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
         msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
  
-       if (start_timer)
-               start_new_msg(smi_info, msg, 2);
-       else
-               smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+       start_new_msg(smi_info, msg, 2);
         smi_info->si_state = SI_CHECKING_ENABLES;
  }
  
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
  {
         unsigned char msg[3];
  
@@ -397,10 +399,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
         msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
         msg[2] = WDT_PRE_TIMEOUT_INT;
  
-       if (start_timer)
-               start_new_msg(smi_info, msg, 3);
-       else
-               smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+       start_new_msg(smi_info, msg, 3);
         smi_info->si_state = SI_CLEARING_FLAGS;
  }
  
@@ -435,11 +434,11 @@ static void start_getting_events(struct smi_info *smi_info)
   * Note that we cannot just use disable_irq(), since the interrupt may
   * be shared.
   */
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
  {
         if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) {
                 smi_info->interrupt_disabled = true;
-               start_check_enables(smi_info, start_timer);
+               start_check_enables(smi_info);
                 return true;
         }
         return false;
@@ -449,7 +448,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
  {
         if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) {
                 smi_info->interrupt_disabled = false;
-               start_check_enables(smi_info, true);
+               start_check_enables(smi_info);
                 return true;
         }
         return false;
@@ -467,7 +466,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
  
         msg = ipmi_alloc_smi_msg();
         if (!msg) {
-               if (!disable_si_irq(smi_info, true))
+               if (!disable_si_irq(smi_info))
                         smi_info->si_state = SI_NORMAL;
         } else if (enable_si_irq(smi_info)) {
                 ipmi_free_smi_msg(msg);
@@ -483,7 +482,7 @@ retry:
                 /* Watchdog pre-timeout */
                 smi_inc_stat(smi_info, watchdog_pretimeouts);
  
-               start_clear_flags(smi_info, true);
+               start_clear_flags(smi_info);
                 smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
                 if (smi_info->intf)
                         ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -866,7 +865,7 @@ restart:
                  * disable and messages disabled.
                  */
                 if (smi_info->supports_event_msg_buff || smi_info->io.irq) {
-                       start_check_enables(smi_info, true);
+                       start_check_enables(smi_info);
                 } else {
                         smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
                         if (!smi_info->curr_msg)
@@ -1167,6 +1166,7 @@ static int smi_start_processing(void       *send_info,
  
         /* Set up the timer that drives the interface. */
         timer_setup(&new_smi->si_timer, smi_timeout, 0);
+       new_smi->timer_can_start = true;
         smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
  
         /* Try to claim any interrupts. */
@@ -1936,10 +1936,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
         check_set_rcv_irq(smi_info);
  }
  
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
  {
         if (smi_info->thread != NULL)
                 kthread_stop(smi_info->thread);
+
+       smi_info->timer_can_start = false;
         if (smi_info->timer_running)
                 del_timer_sync(&smi_info->si_timer);
  }
@@ -2152,7 +2154,7 @@ static int try_smi_init(struct smi_info *new_smi)
          * Start clearing the flags before we enable interrupts or the
          * timer to avoid racing with the timer.
          */
-       start_clear_flags(new_smi, false);
+       start_clear_flags(new_smi);
  
         /*
          * IRQ is defined to be set when non-zero.  req_events will
@@ -2238,7 +2240,7 @@ out_err_remove_attrs:
         dev_set_drvdata(new_smi->io.dev, NULL);
  
  out_err_stop_timer:
-       wait_for_timer_and_thread(new_smi);
+       stop_timer_and_thread(new_smi);
  
  out_err:
         new_smi->interrupt_disabled = true;
@@ -2388,7 +2390,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
          */
         if (to_clean->io.irq_cleanup)
                 to_clean->io.irq_cleanup(&to_clean->io);
-       wait_for_timer_and_thread(to_clean);
+       stop_timer_and_thread(to_clean);
  
         /*
          * Timeouts are stopped, now make sure the interrupts are off
@@ -2400,7 +2402,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
                 schedule_timeout_uninterruptible(1);
         }
         if (to_clean->handlers)
-               disable_si_irq(to_clean, false);
+               disable_si_irq(to_clean);
         while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
                 poll(to_clean);
                 schedule_timeout_uninterruptible(1);
diff --git a/drivers/char/ipmi/ipmi_si_parisc.c b/drivers/char/ipmi/ipmi_si_parisc.c

index 090b073ab441961f5c530dd9f453f1f1290e5a08..6b10f0e18a95d7f846edd78d22e392ec0e10cc69 100644 (file)
--- a/drivers/char/ipmi/ipmi_si_parisc.c
+++ b/drivers/char/ipmi/ipmi_si_parisc.c
@@ -10,6 +10,8 @@ static int __init ipmi_parisc_probe(struct parisc_device *dev)
  {
         struct si_sm_io io;
  
+       memset(&io, 0, sizeof(io));
+
         io.si_type      = SI_KCS;
         io.addr_source  = SI_DEVICETREE;
         io.addr_type    = IPMI_MEM_ADDR_SPACE;
diff --git a/drivers/char/ipmi/ipmi_si_pci.c b/drivers/char/ipmi/ipmi_si_pci.c

index 99771f5cad07a7b25285f33cf0073739ca27e4ae..27dd11c49d2197aa098426ca2992565f75e45a32 100644 (file)
--- a/drivers/char/ipmi/ipmi_si_pci.c
+++ b/drivers/char/ipmi/ipmi_si_pci.c
@@ -103,10 +103,13 @@ static int ipmi_pci_probe(struct pci_dev *pdev,
         io.addr_source_cleanup = ipmi_pci_cleanup;
         io.addr_source_data = pdev;
  
-       if (pci_resource_flags(pdev, 0) & IORESOURCE_IO)
+       if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) {
                 io.addr_type = IPMI_IO_ADDR_SPACE;
-       else
+               io.io_setup = ipmi_si_port_setup;
+       } else {
                 io.addr_type = IPMI_MEM_ADDR_SPACE;
+               io.io_setup = ipmi_si_mem_setup;
+       }
         io.addr_data = pci_resource_start(pdev, 0);
  
         io.regspacing = ipmi_pci_probe_regspacing(&io);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c

index 647d056df88c8dd2a7d8288e35fa2eeba9b7705b..8a1860a36c778aba3b66de996a6cc7ee878cf97e 100644 (file)
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1564,6 +1564,9 @@ static void clk_change_rate(struct clk_core *core)
                 best_parent_rate = core->parent->rate;
         }
  
+       if (clk_pm_runtime_get(core))
+               return;
+
         if (core->flags & CLK_SET_RATE_UNGATE) {
                 unsigned long flags;
  
@@ -1634,6 +1637,8 @@ static void clk_change_rate(struct clk_core *core)
         /* handle the new child who might not be in core->children yet */
         if (core->new_child)
                 clk_change_rate(core->new_child);
+
+       clk_pm_runtime_put(core);
  }
  
  static int clk_core_set_rate_nolock(struct clk_core *core,
diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c

index a1a634253d6f2299bfad888b2fa193c98b4ac019..f00d8758ba24f6e5ed537a88be76ff85e5a7c5e4 100644 (file)
--- a/drivers/clk/sunxi/clk-sun9i-mmc.c
+++ b/drivers/clk/sunxi/clk-sun9i-mmc.c
@@ -16,6 +16,7 @@
  
  #include <linux/clk.h>
  #include <linux/clk-provider.h>
+#include <linux/delay.h>
  #include <linux/init.h>
  #include <linux/of.h>
  #include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
         return 0;
  }
  
+static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
+                                unsigned long id)
+{
+       sun9i_mmc_reset_assert(rcdev, id);
+       udelay(10);
+       sun9i_mmc_reset_deassert(rcdev, id);
+
+       return 0;
+}
+
  static const struct reset_control_ops sun9i_mmc_reset_ops = {
         .assert         = sun9i_mmc_reset_assert,
         .deassert       = sun9i_mmc_reset_deassert,
+       .reset          = sun9i_mmc_reset_reset,
  };
  
  static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c

index 58d4f4e1ad6a907991873a03027e6c7aa2f31fc4..ca38229b045ab288a2f250dddaf1b174e8c0572f 100644 (file)
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -22,6 +22,8 @@
  
  #include "cpufreq_governor.h"
  
+#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL      (2 * TICK_NSEC / NSEC_PER_USEC)
+
  static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
  
  static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
  {
         struct dbs_data *dbs_data = to_dbs_data(attr_set);
         struct policy_dbs_info *policy_dbs;
+       unsigned int sampling_interval;
         int ret;
-       ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
-       if (ret != 1)
+
+       ret = sscanf(buf, "%u", &sampling_interval);
+       if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
                 return -EINVAL;
  
+       dbs_data->sampling_rate = sampling_interval;
+
         /*
          * We are operating under dbs_data->mutex and so the list and its
          * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
         if (ret)
                 goto free_policy_dbs_info;
  
-       dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
+       /*
+        * The sampling interval should not be less than the transition latency
+        * of the CPU and it also cannot be too small for dbs_update() to work
+        * correctly.
+        */
+       dbs_data->sampling_rate = max_t(unsigned int,
+                                       CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
+                                       cpufreq_policy_transition_delay_us(policy));
  
         if (!have_governor_per_policy())
                 gov->gdbs_data = dbs_data;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c

index 628fe899cb483da9dbf0f7661b537734bc82f784..d9b2c2de49c43f125c91b382f818ff81d0ffc6ac 100644 (file)
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
         val >>= OCOTP_CFG3_SPEED_SHIFT;
         val &= 0x3;
  
-       if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
-            of_machine_is_compatible("fsl,imx6q"))
-               if (dev_pm_opp_disable(dev, 1200000000))
-                       dev_warn(dev, "failed to disable 1.2GHz OPP\n");
         if (val < OCOTP_CFG3_SPEED_996MHZ)
                 if (dev_pm_opp_disable(dev, 996000000))
                         dev_warn(dev, "failed to disable 996MHz OPP\n");
-       if (of_machine_is_compatible("fsl,imx6q")) {
+
+       if (of_machine_is_compatible("fsl,imx6q") ||
+           of_machine_is_compatible("fsl,imx6qp")) {
                 if (val != OCOTP_CFG3_SPEED_852MHZ)
                         if (dev_pm_opp_disable(dev, 852000000))
                                 dev_warn(dev, "failed to disable 852MHz OPP\n");
+               if (val != OCOTP_CFG3_SPEED_1P2GHZ)
+                       if (dev_pm_opp_disable(dev, 1200000000))
+                               dev_warn(dev, "failed to disable 1.2GHz OPP\n");
         }
         iounmap(base);
  put_node:
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c

index fbab271b3bf9f9506c86579c75ebe32fc3235228..a861b5b4d4437d6b3be7dcf5e9d0b3475205455d 100644 (file)
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
                          unsigned long flags)
  {
         struct at_dma_chan      *atchan = to_at_dma_chan(chan);
-       struct data_chunk       *first = xt->sgl;
+       struct data_chunk       *first;
         struct at_desc          *desc = NULL;
         size_t                  xfer_count;
         unsigned int            dwidth;
@@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
         if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
                 return NULL;
  
+       first = xt->sgl;
+
         dev_info(chan2dev(chan),
                  "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
                 __func__, &xt->src_start, &xt->dst_start, xt->numf,
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c

index d50273fed715096ac625382f6c511f537da57bf4..afd5e10f8927cb0c5573bb946a48755aad58b0aa 100644 (file)
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev)
  
         ret = dma_async_device_register(dd);
         if (ret)
-               return ret;
+               goto err_clk;
  
         irq = platform_get_irq(pdev, 0);
         ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
@@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev)
  
  err_unregister:
         dma_async_device_unregister(dd);
+err_clk:
+       clk_disable_unprepare(dmadev->clk);
         return ret;
  }
  
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c

index 47edc7fbf91f52e5259060824c38eaab69ebdb56..ec5f9d2bc8202f340c615cbe43731016d316d547 100644 (file)
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
  #define PATTERN_COUNT_MASK     0x1f
  #define PATTERN_MEMSET_IDX     0x01
  
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+       bool                    done;
+       wait_queue_head_t       *wait;
+};
+
  struct dmatest_thread {
         struct list_head        node;
         struct dmatest_info     *info;
@@ -165,6 +171,8 @@ struct dmatest_thread {
         u8                      **dsts;
         u8                      **udsts;
         enum dma_transaction_type type;
+       wait_queue_head_t done_wait;
+       struct dmatest_done test_done;
         bool                    done;
  };
  
@@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
         return error_count;
  }
  
-/* poor man's completion - we want to use wait_event_freezable() on it */
-struct dmatest_done {
-       bool                    done;
-       wait_queue_head_t       *wait;
-};
  
  static void dmatest_callback(void *arg)
  {
         struct dmatest_done *done = arg;
-
-       done->done = true;
-       wake_up_all(done->wait);
+       struct dmatest_thread *thread =
+               container_of(arg, struct dmatest_thread, done_wait);
+       if (!thread->done) {
+               done->done = true;
+               wake_up_all(done->wait);
+       } else {
+               /*
+                * If thread->done, it means that this callback occurred
+                * after the parent thread has cleaned up. This can
+                * happen in the case that driver doesn't implement
+                * the terminate_all() functionality and a dma operation
+                * did not occur within the timeout period
+                */
+               WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
+       }
  }
  
  static unsigned int min_odd(unsigned int x, unsigned int y)
@@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
   */
  static int dmatest_func(void *data)
  {
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
         struct dmatest_thread   *thread = data;
-       struct dmatest_done     done = { .wait = &done_wait };
+       struct dmatest_done     *done = &thread->test_done;
         struct dmatest_info     *info;
         struct dmatest_params   *params;
         struct dma_chan         *chan;
@@ -673,9 +687,9 @@ static int dmatest_func(void *data)
                         continue;
                 }
  
-               done.done = false;
+               done->done = false;
                 tx->callback = dmatest_callback;
-               tx->callback_param = &done;
+               tx->callback_param = done;
                 cookie = tx->tx_submit(tx);
  
                 if (dma_submit_error(cookie)) {
@@ -688,21 +702,12 @@ static int dmatest_func(void *data)
                 }
                 dma_async_issue_pending(chan);
  
-               wait_event_freezable_timeout(done_wait, done.done,
+               wait_event_freezable_timeout(thread->done_wait, done->done,
                                              msecs_to_jiffies(params->timeout));
  
                 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
  
-               if (!done.done) {
-                       /*
-                        * We're leaving the timed out dma operation with
-                        * dangling pointer to done_wait.  To make this
-                        * correct, we'll need to allocate wait_done for
-                        * each test iteration and perform "who's gonna
-                        * free it this time?" dancing.  For now, just
-                        * leave it dangling.
-                        */
-                       WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
+               if (!done->done) {
                         dmaengine_unmap_put(um);
                         result("test timed out", total_tests, src_off, dst_off,
                                len, 0);
@@ -789,7 +794,7 @@ err_thread_type:
                 dmatest_KBs(runtime, total_len), ret);
  
         /* terminate all transfers on specified channels */
-       if (ret)
+       if (ret || failed_tests)
                 dmaengine_terminate_all(chan);
  
         thread->done = true;
@@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
                 thread->info = info;
                 thread->chan = dtc->chan;
                 thread->type = type;
+               thread->test_done.wait = &thread->done_wait;
+               init_waitqueue_head(&thread->done_wait);
                 smp_wmb();
                 thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
                                 dma_chan_name(chan), op, i);
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c

index 6775f2c74e25b7269417bbe001adfb03698dea97..c7568869284e17d4b63379b236a0f30391640820 100644 (file)
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -863,11 +863,11 @@ static void fsl_edma_irq_exit(
         }
  }
  
-static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma)
+static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
  {
         int i;
  
-       for (i = 0; i < DMAMUX_NR; i++)
+       for (i = 0; i < nr_clocks; i++)
                 clk_disable_unprepare(fsl_edma->muxclk[i]);
  }
  
@@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev)
  
                 res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
                 fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
-               if (IS_ERR(fsl_edma->muxbase[i]))
+               if (IS_ERR(fsl_edma->muxbase[i])) {
+                       /* on error: disable all previously enabled clks */
+                       fsl_disable_clocks(fsl_edma, i);
                         return PTR_ERR(fsl_edma->muxbase[i]);
+               }
  
                 sprintf(clkname, "dmamux%d", i);
                 fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
                 if (IS_ERR(fsl_edma->muxclk[i])) {
                         dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
+                       /* on error: disable all previously enabled clks */
+                       fsl_disable_clocks(fsl_edma, i);
                         return PTR_ERR(fsl_edma->muxclk[i]);
                 }
  
                 ret = clk_prepare_enable(fsl_edma->muxclk[i]);
-               if (ret) {
-                       /* disable only clks which were enabled on error */
-                       for (; i >= 0; i--)
-                               clk_disable_unprepare(fsl_edma->muxclk[i]);
-
-                       dev_err(&pdev->dev, "DMAMUX clk block failed.\n");
-                       return ret;
-               }
+               if (ret)
+                       /* on error: disable all previously enabled clks */
+                       fsl_disable_clocks(fsl_edma, i);
  
         }
  
@@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
         if (ret) {
                 dev_err(&pdev->dev,
                         "Can't register Freescale eDMA engine. (%d)\n", ret);
-               fsl_disable_clocks(fsl_edma);
+               fsl_disable_clocks(fsl_edma, DMAMUX_NR);
                 return ret;
         }
  
@@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
                 dev_err(&pdev->dev,
                         "Can't register Freescale eDMA of_dma. (%d)\n", ret);
                 dma_async_device_unregister(&fsl_edma->dma_dev);
-               fsl_disable_clocks(fsl_edma);
+               fsl_disable_clocks(fsl_edma, DMAMUX_NR);
                 return ret;
         }
  
@@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev)
         fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
         of_dma_controller_free(np);
         dma_async_device_unregister(&fsl_edma->dma_dev);
-       fsl_disable_clocks(fsl_edma);
+       fsl_disable_clocks(fsl_edma, DMAMUX_NR);
  
         return 0;
  }
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c

index 2f31d3d0caa61821aa08aea360e06709bdb25d48..7792a9186f9cf35bae71792e5e0783cf53364b05 100644 (file)
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
         if (memcmp(src, dest, IOAT_TEST_SIZE)) {
                 dev_err(dev, "Self-test copy failed compare, disabling\n");
                 err = -ENODEV;
-               goto free_resources;
+               goto unmap_dma;
         }
  
  unmap_dma:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index da43813d67a4ad56ddecb79ac0a749afe29abc43..5aeb5f8816f3b9a68666cf57372cddeb12c2b36a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
+                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index f71fe6d2ddda795fd2fb914740b75845893c1298..bb5fa895fb6446097580ce229ef23dc473f979af 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                        const struct dm_connector_state *dm_state)
  {
         struct drm_display_mode *preferred_mode = NULL;
-       const struct drm_connector *drm_connector;
+       struct drm_connector *drm_connector;
         struct dc_stream_state *stream = NULL;
         struct drm_display_mode mode = *drm_mode;
         bool native_mode_found = false;
@@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
  
         if (!aconnector->dc_sink) {
                 /*
-                * Exclude MST from creating fake_sink
-                * TODO: need to enable MST into fake_sink feature
+                * Create dc_sink when necessary to MST
+                * Don't apply fake_sink to MST
                  */
-               if (aconnector->mst_port)
-                       goto stream_create_fail;
+               if (aconnector->mst_port) {
+                       dm_dp_mst_dc_sink_create(drm_connector);
+                       goto mst_dc_sink_create_done;
+               }
  
                 if (create_fake_sink(aconnector))
                         goto stream_create_fail;
@@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
  stream_create_fail:
  dm_state_null:
  drm_connector_null:
+mst_dc_sink_create_done:
         return stream;
  }
  
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h

index 117521c6a6ed26213c60ec5316df64b66eeaba12..0230250a1164bb01b41f3a2b22011960909e14bb 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
         struct mutex hpd_lock;
  
         bool fake_enable;
+
+       bool mst_connected;
  };
  
  #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c

index f8efb98b1fa72f86ecbec4c568a653af164c7daa..638c2c2b5cd79069e7312b7d7f23a28b6f5eb3b6 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
         return ret;
  }
  
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
+{
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+       struct edid *edid;
+       struct dc_sink *dc_sink;
+       struct dc_sink_init_data init_params = {
+                       .link = aconnector->dc_link,
+                       .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+
+       edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
+
+       if (!edid) {
+               drm_mode_connector_update_edid_property(
+                       &aconnector->base,
+                       NULL);
+               return;
+       }
+
+       aconnector->edid = edid;
+
+       dc_sink = dc_link_add_remote_sink(
+               aconnector->dc_link,
+               (uint8_t *)aconnector->edid,
+               (aconnector->edid->extensions + 1) * EDID_LENGTH,
+               &init_params);
+
+       dc_sink->priv = aconnector;
+       aconnector->dc_sink = dc_sink;
+
+       amdgpu_dm_add_sink_to_freesync_module(
+                       connector, aconnector->edid);
+
+       drm_mode_connector_update_edid_property(
+                                       &aconnector->base, aconnector->edid);
+}
+
  static int dm_dp_mst_get_modes(struct drm_connector *connector)
  {
         struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                         drm_mode_connector_set_path_property(connector, pathprop);
  
                         drm_connector_list_iter_end(&conn_iter);
+                       aconnector->mst_connected = true;
                         return &aconnector->base;
                 }
         }
@@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
          */
         amdgpu_dm_connector_funcs_reset(connector);
  
+       aconnector->mst_connected = true;
+
         DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
                         aconnector, connector->base.id, aconnector->mst_port);
  
@@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
         drm_mode_connector_update_edid_property(
                         &aconnector->base,
                         NULL);
+
+       aconnector->mst_connected = false;
  }
  
  static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
         drm_kms_helper_hotplug_event(dev);
  }
  
+static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
+{
+       mutex_lock(&connector->dev->mode_config.mutex);
+       drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
+       mutex_unlock(&connector->dev->mode_config.mutex);
+}
+
  static void dm_dp_mst_register_connector(struct drm_connector *connector)
  {
         struct drm_device *dev = connector->dev;
         struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
  
         if (adev->mode_info.rfbdev)
                 drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
  
         drm_connector_register(connector);
  
+       if (aconnector->mst_connected)
+               dm_dp_mst_link_status_reset(connector);
  }
  
  static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h

index 2da851b40042aee9b79eb2c666d45c0f5061fee0..8cf51da26657e29e72062b34aeed7e5d827f9e21 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
  
  void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
                                        struct amdgpu_dm_connector *aconnector);
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
  
  #endif
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c

index 3dce35e66b0917d2ec93420063b3477443788302..b142629a105841b603501291800e45b9ade30591 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -900,6 +900,15 @@ bool dcn_validate_bandwidth(
                         v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
                         v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
                         v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
+                       /*
+                        * Spreadsheet doesn't handle taps_c is one properly,
+                        * need to force Chroma to always be scaled to pass
+                        * bandwidth validation.
+                        */
+                       if (v->override_hta_pschroma[input_idx] == 1)
+                               v->override_hta_pschroma[input_idx] = 2;
+                       if (v->override_vta_pschroma[input_idx] == 1)
+                               v->override_vta_pschroma[input_idx] = 2;
                         v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
                 }
                 if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c

index e27ed4a45265290690604b10e6d4df4fbee77514..42a111b9505dcb5190437a381c7dba8fda444719 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
                 link->link_enc->funcs->disable_output(link->link_enc, signal, link);
  }
  
-bool dp_active_dongle_validate_timing(
+static bool dp_active_dongle_validate_timing(
                 const struct dc_crtc_timing *timing,
                 const struct dc_dongle_caps *dongle_caps)
  {
@@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing(
         /* Check Color Depth and Pixel Clock */
         if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
                 required_pix_clk /= 2;
+       else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+               required_pix_clk = required_pix_clk * 2 / 3;
  
         switch (timing->display_color_depth) {
         case COLOR_DEPTH_666:
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c

index 07ff8d2faf3f4630276d9241092f605274375cda..d844fadcd56f048739e374cb8d534cba10d235b3 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface(
                 int num_planes,
                 struct dc_state *context)
  {
-       int i, be_idx;
+       int i;
  
         if (num_planes == 0)
                 return;
  
-       be_idx = -1;
         for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (stream == context->res_ctx.pipe_ctx[i].stream) {
-                       be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst;
-                       break;
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if (stream == pipe_ctx->stream) {
+                       if (!pipe_ctx->top_pipe &&
+                               (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                               dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
                 }
         }
  
@@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface(
                                         context->stream_count);
  
                 dce110_program_front_end_for_pipe(dc, pipe_ctx);
+
+               dc->hwss.update_plane_addr(dc, pipe_ctx);
+
                 program_surface_visibility(dc, pipe_ctx);
  
         }
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if ((stream == pipe_ctx->stream) &&
+                       (!pipe_ctx->top_pipe) &&
+                       (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                       dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
+       }
  }
  
  static void dce110_power_down_fe(struct dc *dc, int fe_idx)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c

index 74e7c82bdc76a71080d8e22dd6db90179ffee54e..a9d55d0dd69e009f4a31038c9529c86c05904ec7 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
                         scl_data->taps.h_taps = 1;
                 if (IDENTITY_RATIO(scl_data->ratios.vert))
                         scl_data->taps.v_taps = 1;
-               /*
-                * Spreadsheet doesn't handle taps_c is one properly,
-                * need to force Chroma to always be scaled to pass
-                * bandwidth validation.
-                */
+               if (IDENTITY_RATIO(scl_data->ratios.horz_c))
+                       scl_data->taps.h_taps_c = 1;
+               if (IDENTITY_RATIO(scl_data->ratios.vert_c))
+                       scl_data->taps.v_taps_c = 1;
         }
  
         return true;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c

index 4820141379534fbeab9322da64139e08557b4a67..9ae236036e324c345177bcb8f9cd84b20110a582 100644 (file)
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -152,14 +152,23 @@ static void drm_connector_free(struct kref *kref)
         connector->funcs->destroy(connector);
  }
  
-static void drm_connector_free_work_fn(struct work_struct *work)
+void drm_connector_free_work_fn(struct work_struct *work)
  {
-       struct drm_connector *connector =
-               container_of(work, struct drm_connector, free_work);
-       struct drm_device *dev = connector->dev;
+       struct drm_connector *connector, *n;
+       struct drm_device *dev =
+               container_of(work, struct drm_device, mode_config.connector_free_work);
+       struct drm_mode_config *config = &dev->mode_config;
+       unsigned long flags;
+       struct llist_node *freed;
  
-       drm_mode_object_unregister(dev, &connector->base);
-       connector->funcs->destroy(connector);
+       spin_lock_irqsave(&config->connector_list_lock, flags);
+       freed = llist_del_all(&config->connector_free_list);
+       spin_unlock_irqrestore(&config->connector_list_lock, flags);
+
+       llist_for_each_entry_safe(connector, n, freed, free_node) {
+               drm_mode_object_unregister(dev, &connector->base);
+               connector->funcs->destroy(connector);
+       }
  }
  
  /**
@@ -191,8 +200,6 @@ int drm_connector_init(struct drm_device *dev,
         if (ret)
                 return ret;
  
-       INIT_WORK(&connector->free_work, drm_connector_free_work_fn);
-
         connector->base.properties = &connector->properties;
         connector->dev = dev;
         connector->funcs = funcs;
@@ -547,10 +554,17 @@ EXPORT_SYMBOL(drm_connector_list_iter_begin);
   * actually release the connector when dropping our final reference.
   */
  static void
-drm_connector_put_safe(struct drm_connector *conn)
+__drm_connector_put_safe(struct drm_connector *conn)
  {
-       if (refcount_dec_and_test(&conn->base.refcount.refcount))
-               schedule_work(&conn->free_work);
+       struct drm_mode_config *config = &conn->dev->mode_config;
+
+       lockdep_assert_held(&config->connector_list_lock);
+
+       if (!refcount_dec_and_test(&conn->base.refcount.refcount))
+               return;
+
+       llist_add(&conn->free_node, &config->connector_free_list);
+       schedule_work(&config->connector_free_work);
  }
  
  /**
@@ -582,10 +596,10 @@ drm_connector_list_iter_next(struct drm_connector_list_iter *iter)
  
                 /* loop until it's not a zombie connector */
         } while (!kref_get_unless_zero(&iter->conn->base.refcount));
-       spin_unlock_irqrestore(&config->connector_list_lock, flags);
  
         if (old_conn)
-               drm_connector_put_safe(old_conn);
+               __drm_connector_put_safe(old_conn);
+       spin_unlock_irqrestore(&config->connector_list_lock, flags);
  
         return iter->conn;
  }
@@ -602,9 +616,15 @@ EXPORT_SYMBOL(drm_connector_list_iter_next);
   */
  void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
  {
+       struct drm_mode_config *config = &iter->dev->mode_config;
+       unsigned long flags;
+
         iter->dev = NULL;
-       if (iter->conn)
-               drm_connector_put_safe(iter->conn);
+       if (iter->conn) {
+               spin_lock_irqsave(&config->connector_list_lock, flags);
+               __drm_connector_put_safe(iter->conn);
+               spin_unlock_irqrestore(&config->connector_list_lock, flags);
+       }
         lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
  }
  EXPORT_SYMBOL(drm_connector_list_iter_end);
@@ -1231,6 +1251,19 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector,
         if (edid)
                 size = EDID_LENGTH * (1 + edid->extensions);
  
+       /* Set the display info, using edid if available, otherwise
+        * reseting the values to defaults. This duplicates the work
+        * done in drm_add_edid_modes, but that function is not
+        * consistently called before this one in all drivers and the
+        * computation is cheap enough that it seems better to
+        * duplicate it rather than attempt to ensure some arbitrary
+        * ordering of calls.
+        */
+       if (edid)
+               drm_add_display_info(connector, edid);
+       else
+               drm_reset_display_info(connector);
+
         drm_object_property_set_value(&connector->base,
                                       dev->mode_config.non_desktop_property,
                                       connector->display_info.non_desktop);
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h

index 9ebb8841778cc99095a2235ab8b1d89f654816f2..af00f42ba269b0da3111ac5be480688815875da8 100644 (file)
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -142,6 +142,7 @@ int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj,
                                     uint64_t value);
  int drm_connector_create_standard_properties(struct drm_device *dev);
  const char *drm_get_connector_force_name(enum drm_connector_force force);
+void drm_connector_free_work_fn(struct work_struct *work);
  
  /* IOCTL */
  int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c

index 5dfe147638716730573d008edc65abfc92f6d75a..cb487148359a8dca321fae3920c1dd89dd140056 100644 (file)
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1731,7 +1731,7 @@ EXPORT_SYMBOL(drm_edid_duplicate);
   *
   * Returns true if @vendor is in @edid, false otherwise
   */
-static bool edid_vendor(struct edid *edid, const char *vendor)
+static bool edid_vendor(const struct edid *edid, const char *vendor)
  {
         char edid_vendor[3];
  
@@ -1749,7 +1749,7 @@ static bool edid_vendor(struct edid *edid, const char *vendor)
   *
   * This tells subsequent routines what fixes they need to apply.
   */
-static u32 edid_get_quirks(struct edid *edid)
+static u32 edid_get_quirks(const struct edid *edid)
  {
         const struct edid_quirk *quirk;
         int i;
@@ -2813,7 +2813,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
  /*
   * Search EDID for CEA extension block.
   */
-static u8 *drm_find_edid_extension(struct edid *edid, int ext_id)
+static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id)
  {
         u8 *edid_ext = NULL;
         int i;
@@ -2835,12 +2835,12 @@ static u8 *drm_find_edid_extension(struct edid *edid, int ext_id)
         return edid_ext;
  }
  
-static u8 *drm_find_cea_extension(struct edid *edid)
+static u8 *drm_find_cea_extension(const struct edid *edid)
  {
         return drm_find_edid_extension(edid, CEA_EXT);
  }
  
-static u8 *drm_find_displayid_extension(struct edid *edid)
+static u8 *drm_find_displayid_extension(const struct edid *edid)
  {
         return drm_find_edid_extension(edid, DISPLAYID_EXT);
  }
@@ -4363,7 +4363,7 @@ drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db)
  }
  
  static void drm_parse_cea_ext(struct drm_connector *connector,
-                             struct edid *edid)
+                             const struct edid *edid)
  {
         struct drm_display_info *info = &connector->display_info;
         const u8 *edid_ext;
@@ -4397,11 +4397,33 @@ static void drm_parse_cea_ext(struct drm_connector *connector,
         }
  }
  
-static void drm_add_display_info(struct drm_connector *connector,
-                                struct edid *edid, u32 quirks)
+/* A connector has no EDID information, so we've got no EDID to compute quirks from. Reset
+ * all of the values which would have been set from EDID
+ */
+void
+drm_reset_display_info(struct drm_connector *connector)
  {
         struct drm_display_info *info = &connector->display_info;
  
+       info->width_mm = 0;
+       info->height_mm = 0;
+
+       info->bpc = 0;
+       info->color_formats = 0;
+       info->cea_rev = 0;
+       info->max_tmds_clock = 0;
+       info->dvi_dual = false;
+
+       info->non_desktop = 0;
+}
+EXPORT_SYMBOL_GPL(drm_reset_display_info);
+
+u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid)
+{
+       struct drm_display_info *info = &connector->display_info;
+
+       u32 quirks = edid_get_quirks(edid);
+
         info->width_mm = edid->width_cm * 10;
         info->height_mm = edid->height_cm * 10;
  
@@ -4414,11 +4436,13 @@ static void drm_add_display_info(struct drm_connector *connector,
  
         info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP);
  
+       DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop);
+
         if (edid->revision < 3)
-               return;
+               return quirks;
  
         if (!(edid->input & DRM_EDID_INPUT_DIGITAL))
-               return;
+               return quirks;
  
         drm_parse_cea_ext(connector, edid);
  
@@ -4438,7 +4462,7 @@ static void drm_add_display_info(struct drm_connector *connector,
  
         /* Only defined for 1.4 with digital displays */
         if (edid->revision < 4)
-               return;
+               return quirks;
  
         switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) {
         case DRM_EDID_DIGITAL_DEPTH_6:
@@ -4473,7 +4497,9 @@ static void drm_add_display_info(struct drm_connector *connector,
                 info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
         if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422)
                 info->color_formats |= DRM_COLOR_FORMAT_YCRCB422;
+       return quirks;
  }
+EXPORT_SYMBOL_GPL(drm_add_display_info);
  
  static int validate_displayid(u8 *displayid, int length, int idx)
  {
@@ -4627,14 +4653,12 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
                 return 0;
         }
  
-       quirks = edid_get_quirks(edid);
-
         /*
          * CEA-861-F adds ycbcr capability map block, for HDMI 2.0 sinks.
          * To avoid multiple parsing of same block, lets parse that map
          * from sink info, before parsing CEA modes.
          */
-       drm_add_display_info(connector, edid, quirks);
+       quirks = drm_add_display_info(connector, edid);
  
         /*
          * EDID spec says modes should be preferred in this order:
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c

index d1eb56a1eff4078d0d29b33ac2f17b06c4ae0d76..1402c0e71b03d18866139056b12f0d5fd84b6afb 100644 (file)
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
  
         mutex_lock(&dev->mode_config.idr_mutex);
  
-       /* Insert the new lessee into the tree */
-       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
-       if (id < 0) {
-               error = id;
-               goto out_lessee;
-       }
-
-       lessee->lessee_id = id;
-       lessee->lessor = drm_master_get(lessor);
-       list_add_tail(&lessee->lessee_list, &lessor->lessees);
-
         idr_for_each_entry(leases, entry, object) {
                 error = 0;
                 if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
                 }
         }
  
+       /* Insert the new lessee into the tree */
+       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
+       if (id < 0) {
+               error = id;
+               goto out_lessee;
+       }
+
+       lessee->lessee_id = id;
+       lessee->lessor = drm_master_get(lessor);
+       list_add_tail(&lessee->lessee_list, &lessor->lessees);
+
         /* Move the leases over */
         lessee->leases = *leases;
         DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
@@ -254,10 +254,10 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
         return lessee;
  
  out_lessee:
-       drm_master_put(&lessee);
-
         mutex_unlock(&dev->mode_config.idr_mutex);
  
+       drm_master_put(&lessee);
+
         return ERR_PTR(error);
  }
  
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c

index 61a1c8ea74bc5838b905a67e55cb80203f8ef58f..c3c79ee6119e0cbaf2e7ba0ebadd6904adefbb8d 100644 (file)
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -575,21 +575,23 @@ EXPORT_SYMBOL(drm_mm_remove_node);
   */
  void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
  {
+       struct drm_mm *mm = old->mm;
+
         DRM_MM_BUG_ON(!old->allocated);
  
         *new = *old;
  
         list_replace(&old->node_list, &new->node_list);
-       rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root);
+       rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree);
  
         if (drm_mm_hole_follows(old)) {
                 list_replace(&old->hole_stack, &new->hole_stack);
                 rb_replace_node(&old->rb_hole_size,
                                 &new->rb_hole_size,
-                               &old->mm->holes_size);
+                               &mm->holes_size);
                 rb_replace_node(&old->rb_hole_addr,
                                 &new->rb_hole_addr,
-                               &old->mm->holes_addr);
+                               &mm->holes_addr);
         }
  
         old->allocated = false;
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c

index cc78b3d9e5e4896d87728de290c440c444f19253..256de731361219b976146f0dc398d9a4dfa4dd9e 100644 (file)
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -382,6 +382,9 @@ void drm_mode_config_init(struct drm_device *dev)
         ida_init(&dev->mode_config.connector_ida);
         spin_lock_init(&dev->mode_config.connector_list_lock);
  
+       init_llist_head(&dev->mode_config.connector_free_list);
+       INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn);
+
         drm_mode_create_standard_properties(dev);
  
         /* Just to be sure */
@@ -432,7 +435,7 @@ void drm_mode_config_cleanup(struct drm_device *dev)
         }
         drm_connector_list_iter_end(&conn_iter);
         /* connector_iter drops references in a work item. */
-       flush_scheduled_work();
+       flush_work(&dev->mode_config.connector_free_work);
         if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) {
                 drm_connector_list_iter_begin(dev, &conn_iter);
                 drm_for_each_connector_iter(connector, &conn_iter)
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c

index 37a93cdffb4ad0e7986a634df4d70ccc3fef286e..2c90519576a3e8b63a4c8361f18672db853ebcec 100644 (file)
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
  }
  
  /*
- * setplane_internal - setplane handler for internal callers
+ * __setplane_internal - setplane handler for internal callers
   *
- * Note that we assume an extra reference has already been taken on fb.  If the
- * update fails, this reference will be dropped before return; if it succeeds,
- * the previous framebuffer (if any) will be unreferenced instead.
+ * This function will take a reference on the new fb for the plane
+ * on success.
   *
   * src_{x,y,w,h} are provided in 16.16 fixed point format
   */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
         if (!ret) {
                 plane->crtc = crtc;
                 plane->fb = fb;
-               fb = NULL;
+               drm_framebuffer_get(plane->fb);
         } else {
                 plane->old_fb = NULL;
         }
  
  out:
-       if (fb)
-               drm_framebuffer_put(fb);
         if (plane->old_fb)
                 drm_framebuffer_put(plane->old_fb);
         plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
         struct drm_plane *plane;
         struct drm_crtc *crtc = NULL;
         struct drm_framebuffer *fb = NULL;
+       int ret;
  
         if (!drm_core_check_feature(dev, DRIVER_MODESET))
                 return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
                 }
         }
  
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
-       return setplane_internal(plane, crtc, fb,
-                                plane_req->crtc_x, plane_req->crtc_y,
-                                plane_req->crtc_w, plane_req->crtc_h,
-                                plane_req->src_x, plane_req->src_y,
-                                plane_req->src_w, plane_req->src_h);
+       ret = setplane_internal(plane, crtc, fb,
+                               plane_req->crtc_x, plane_req->crtc_y,
+                               plane_req->crtc_w, plane_req->crtc_h,
+                               plane_req->src_x, plane_req->src_y,
+                               plane_req->src_w, plane_req->src_h);
+
+       if (fb)
+               drm_framebuffer_put(fb);
+
+       return ret;
  }
  
  static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
                 src_h = fb->height << 16;
         }
  
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
         ret = __setplane_internal(crtc->cursor, crtc, fb,
-                               crtc_x, crtc_y, crtc_w, crtc_h,
-                               0, 0, src_w, src_h, ctx);
+                                 crtc_x, crtc_y, crtc_w, crtc_h,
+                                 0, 0, src_w, src_h, ctx);
+
+       if (fb)
+               drm_framebuffer_put(fb);
  
         /* Update successful; save new cursor position, if necessary */
         if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c

index f776fc1cc543abf8e752a5133aaf1ca63fb2d8ff..cb4d09c70fd44647f30b6d10244f25e90db0835f 100644 (file)
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = {
         .release = drm_syncobj_file_release,
  };
  
-static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
-{
-       struct file *file = anon_inode_getfile("syncobj_file",
-                                              &drm_syncobj_file_fops,
-                                              syncobj, 0);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       drm_syncobj_get(syncobj);
-       if (cmpxchg(&syncobj->file, NULL, file)) {
-               /* lost the race */
-               fput(file);
-       }
-
-       return 0;
-}
-
  int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
  {
-       int ret;
+       struct file *file;
         int fd;
  
         fd = get_unused_fd_flags(O_CLOEXEC);
         if (fd < 0)
                 return fd;
  
-       if (!syncobj->file) {
-               ret = drm_syncobj_alloc_file(syncobj);
-               if (ret) {
-                       put_unused_fd(fd);
-                       return ret;
-               }
+       file = anon_inode_getfile("syncobj_file",
+                                 &drm_syncobj_file_fops,
+                                 syncobj, 0);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               return PTR_ERR(file);
         }
-       fd_install(fd, syncobj->file);
+
+       drm_syncobj_get(syncobj);
+       fd_install(fd, file);
+
         *p_fd = fd;
         return 0;
  }
@@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
         return ret;
  }
  
-static struct drm_syncobj *drm_syncobj_fdget(int fd)
-{
-       struct file *file = fget(fd);
-
-       if (!file)
-               return NULL;
-       if (file->f_op != &drm_syncobj_file_fops)
-               goto err;
-
-       return file->private_data;
-err:
-       fput(file);
-       return NULL;
-};
-
  static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
                                     int fd, u32 *handle)
  {
-       struct drm_syncobj *syncobj = drm_syncobj_fdget(fd);
+       struct drm_syncobj *syncobj;
+       struct file *file;
         int ret;
  
-       if (!syncobj)
+       file = fget(fd);
+       if (!file)
                 return -EINVAL;
  
+       if (file->f_op != &drm_syncobj_file_fops) {
+               fput(file);
+               return -EINVAL;
+       }
+
         /* take a reference to put in the idr */
+       syncobj = file->private_data;
         drm_syncobj_get(syncobj);
  
         idr_preload(GFP_KERNEL);
@@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
         spin_unlock(&file_private->syncobj_table_lock);
         idr_preload_end();
  
-       if (ret < 0) {
-               fput(syncobj->file);
-               return ret;
-       }
-       *handle = ret;
-       return 0;
+       if (ret > 0) {
+               *handle = ret;
+               ret = 0;
+       } else
+               drm_syncobj_put(syncobj);
+
+       fput(file);
+       return ret;
  }
  
  static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c

index 2615912430cc97098f0fe806e95e5e40c1ee96f7..435ff8662cfa823a56f5d84a8fe66d4bc8929230 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                 /* Determine if we can get a cache-coherent map, forcing
                  * uncached mapping if we can't.
                  */
-               if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
+               if (!nouveau_drm_use_coherent_gpu_mapping(drm))
                         nvbo->force_coherent = true;
         }
  
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                 if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
                     (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
                         continue;
-               if ((flags & TTM_PL_FLAG_TT  ) && !vmm->page[i].host)
+               if ((flags & TTM_PL_FLAG_TT) &&
+                   (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
                         continue;
  
                 /* Select this page size if it's the first that supports
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c

index 8d4a5be3b913016c410c8226ad5b05b0bf75299b..56fe261b62683a8690ac8f3439426ca1e4c269ef 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
         work->cli = cli;
         mutex_lock(&cli->lock);
         list_add_tail(&work->head, &cli->worker);
-       mutex_unlock(&cli->lock);
         if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence))
                 nouveau_cli_work_fence(fence, &work->cb);
+       mutex_unlock(&cli->lock);
  }
  
  static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h

index 3331e82ae9e7130b18f4a6f307cc284519f873d3..96f6bd8aee5d3a248d76c683b6146ebb8ef673c7 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -157,8 +157,8 @@ struct nouveau_drm {
                 struct nvif_object copy;
                 int mtrr;
                 int type_vram;
-               int type_host;
-               int type_ncoh;
+               int type_host[2];
+               int type_ncoh[2];
         } ttm;
  
         /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
         return dev->dev_private;
  }
  
+static inline bool
+nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
+{
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
+}
+
  int nouveau_pmops_suspend(struct device *);
  int nouveau_pmops_resume(struct device *);
  bool nouveau_pmops_runtime(void);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c

index c533d8e04afc0f1fc4708d85e069323291c428c3..be7357bf2246e6ae326c9b6750c2c183cb0974d9 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
         drm_fb_helper_unregister_fbi(&fbcon->helper);
         drm_fb_helper_fini(&fbcon->helper);
  
-       if (nouveau_fb->nvbo) {
+       if (nouveau_fb && nouveau_fb->nvbo) {
                 nouveau_vma_del(&nouveau_fb->vma);
                 nouveau_bo_unmap(nouveau_fb->nvbo);
                 nouveau_bo_unpin(nouveau_fb->nvbo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c

index 589a9621db763f98454485081a3f80e2324e717c..c002f896850739b343624247e7d52d94e34bf99d 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
         u8 type;
         int ret;
  
-       if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
-               type = drm->ttm.type_ncoh;
+       if (!nouveau_drm_use_coherent_gpu_mapping(drm))
+               type = drm->ttm.type_ncoh[!!mem->kind];
         else
-               type = drm->ttm.type_host;
+               type = drm->ttm.type_host[0];
  
         if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
                 mem->comp = mem->kind = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c

index 08b974b3048279813e2d67ad0d5b0055e68998c2..dff51a0ee0281e8f5924ffc0135d8b4baf8542f9 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
         drm->ttm.mem_global_ref.release = NULL;
  }
  
-int
-nouveau_ttm_init(struct nouveau_drm *drm)
+static int
+nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
  {
-       struct nvkm_device *device = nvxx_device(&drm->client.device);
-       struct nvkm_pci *pci = device->pci;
         struct nvif_mmu *mmu = &drm->client.mmu;
-       struct drm_device *dev = drm->dev;
-       int typei, ret;
+       int typei;
  
         typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
-                                                  NVIF_MEM_COHERENT);
+                                           kind | NVIF_MEM_COHERENT);
         if (typei < 0)
                 return -ENOSYS;
  
-       drm->ttm.type_host = typei;
+       drm->ttm.type_host[!!kind] = typei;
  
-       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE);
+       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
         if (typei < 0)
                 return -ENOSYS;
  
-       drm->ttm.type_ncoh = typei;
+       drm->ttm.type_ncoh[!!kind] = typei;
+       return 0;
+}
+
+int
+nouveau_ttm_init(struct nouveau_drm *drm)
+{
+       struct nvkm_device *device = nvxx_device(&drm->client.device);
+       struct nvkm_pci *pci = device->pci;
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       struct drm_device *dev = drm->dev;
+       int typei, ret;
+
+       ret = nouveau_ttm_init_host(drm, 0);
+       if (ret)
+               return ret;
+
+       if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+           drm->client.device.info.chipset != 0x50) {
+               ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
+               if (ret)
+                       return ret;
+       }
  
         if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
             drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c

index 9e2628dd8e4d6734c2d7c5d073012bbb95b4fa4c..f5371d96b003c23cac9e1f34cf3deca3b54b06a6 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
                         nvif_vmm_put(&vma->vmm->vmm, &tmp);
                 }
                 list_del(&vma->head);
-               *pvma = NULL;
                 kfree(*pvma);
+               *pvma = NULL;
         }
  }
  
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c

index e146436156985a534fa14e0829db4560d6eb1459..00eeaaffeae565a04044fc55e52990eb71d1063b 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
         .imem = gk20a_instmem_new,
         .ltc = gp100_ltc_new,
         .mc = gp10b_mc_new,
-       .mmu = gf100_mmu_new,
+       .mmu = gp10b_mmu_new,
         .secboot = gp10b_secboot_new,
         .pmu = gm20b_pmu_new,
         .timer = gk20a_timer_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c

index 972370ed36f090d0c0323253b79735edd355db07..7c7efa4ea0d0edb391a27db2c6e99179799070f1 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
                         if (data) {
                                 *ver = nvbios_rd08(bios, data + 0x00);
                                 switch (*ver) {
+                               case 0x20:
                                 case 0x21:
                                 case 0x30:
                                 case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
         if (data && idx < *cnt) {
                 u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
                 switch (*ver * !!outp) {
+               case 0x20:
                 case 0x21:
                 case 0x30:
                         *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
                 info->type = nvbios_rd16(bios, data + 0x00);
                 info->mask = nvbios_rd16(bios, data + 0x02);
                 switch (*ver) {
+               case 0x20:
+                       info->mask |= 0x00c0; /* match any link */
+                       /* fall-through */
                 case 0x21:
                 case 0x30:
                         info->flags     = nvbios_rd08(bios, data + 0x05);
                         info->script[0] = nvbios_rd16(bios, data + 0x06);
                         info->script[1] = nvbios_rd16(bios, data + 0x08);
-                       info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
+                       if (*len >= 0x0c)
+                               info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
                         if (*len >= 0x0f) {
                                 info->script[2] = nvbios_rd16(bios, data + 0x0c);
                                 info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
         memset(info, 0x00, sizeof(*info));
         if (data) {
                 switch (*ver) {
+               case 0x20:
                 case 0x21:
                         info->dc    = nvbios_rd08(bios, data + 0x02);
                         info->pe    = nvbios_rd08(bios, data + 0x03);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c

index 1ba7289684aa2116b6fcc4d05869f0d2b8322a39..db48a1daca0c7a3d786332ce25435839fcc10760 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
                         iobj->base.memory.ptrs = &nv50_instobj_fast;
                 else
                         iobj->base.memory.ptrs = &nv50_instobj_slow;
-               refcount_inc(&iobj->maps);
+               refcount_set(&iobj->maps, 1);
         }
  
         mutex_unlock(&imem->subdev.mutex);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c

index b1b1f3626b96298fcdb76f1819fd7b97801d5b37..deb96de54b0030244ec88014bce526119c3fae91 100644 (file)
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
                 return ret;
  
         pci->irq = pdev->irq;
+
+       /* Ensure MSI interrupts are armed, for the case where there are
+        * already interrupts pending (for whatever reason) at load time.
+        */
+       if (pci->msi)
+               pci->func->msi_rearm(pci);
+
         return ret;
  }
  
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c

index dda904ec0534cd9d84d3967b94bf5fa4f444df9e..500b6fb3e0284d2fdfc71265a64f0d5b51fe4f99 100644 (file)
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
         writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
  }
  
+static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
+                                       const struct drm_display_mode *mode)
+{
+       struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
+       unsigned long rate = mode->clock * 1000;
+       unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
+       long rounded_rate;
+
+       /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
+       if (rate > 165000000)
+               return MODE_CLOCK_HIGH;
+       rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
+       if (rounded_rate > 0 &&
+           max_t(unsigned long, rounded_rate, rate) -
+           min_t(unsigned long, rounded_rate, rate) < diff)
+               return MODE_OK;
+       return MODE_NOCLOCK;
+}
+
  static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
         .atomic_check   = sun4i_hdmi_atomic_check,
         .disable        = sun4i_hdmi_disable,
         .enable         = sun4i_hdmi_enable,
         .mode_set       = sun4i_hdmi_mode_set,
+       .mode_valid     = sun4i_hdmi_mode_valid,
  };
  
  static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c

index e122f5b2a395583cc14302a9bc4166fbba671071..f4284b51bdca99a04e8eda109a4d67bf5c9fac74 100644 (file)
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
         if (IS_ERR(tcon->crtc)) {
                 dev_err(dev, "Couldn't create our CRTC\n");
                 ret = PTR_ERR(tcon->crtc);
-               goto err_free_clocks;
+               goto err_free_dotclock;
         }
  
         ret = sun4i_rgb_init(drm, tcon);
         if (ret < 0)
-               goto err_free_clocks;
+               goto err_free_dotclock;
  
         if (tcon->quirks->needs_de_be_mux) {
                 /*
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c

index 44343a2bf55c65458a196b5968b0c494f1c569b0..b5ba6441489f6e4f28f6e71129dfede3361bd262 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                 freed += (nr_free_pool - shrink_pages) << pool->order;
                 if (freed >= sc->nr_to_scan)
                         break;
+               shrink_pages <<= pool->order;
         }
         mutex_unlock(&lock);
         return freed;
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
         int r = 0;
         unsigned i, j, cpages;
         unsigned npages = 1 << order;
-       unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC);
+       unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
  
         /* allocate array for page caching change */
         caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c

index 6c32c89a83a96687d2a817522288ad205ffd5c63..638540943c61a5e095c87be8d2b2bf543ea933b1 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -888,8 +888,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
         /* If we got force-completed because of GPU reset rather than
          * through our IRQ handler, signal the fence now.
          */
-       if (exec->fence)
+       if (exec->fence) {
                 dma_fence_signal(exec->fence);
+               dma_fence_put(exec->fence);
+       }
  
         if (exec->bo) {
                 for (i = 0; i < exec->bo_count; i++) {
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c

index 61b2e5377993dc319cf6b362b66f0c3db9a768cd..26eddbb628936b91f20a000c405bfbc536324e89 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -139,6 +139,7 @@ vc4_irq_finish_render_job(struct drm_device *dev)
         list_move_tail(&exec->head, &vc4->job_done_list);
         if (exec->fence) {
                 dma_fence_signal_locked(exec->fence);
+               dma_fence_put(exec->fence);
                 exec->fence = NULL;
         }
         vc4_submit_next_render_job(dev);
diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c

index bd126a7c6da2aa1c16aaa5456644198cacfb5222..7da75644c7507c1f6161ea784a318b1aef76aac4 100644 (file)
--- a/drivers/hwtracing/stm/ftrace.c
+++ b/drivers/hwtracing/stm/ftrace.c
@@ -42,9 +42,11 @@ static struct stm_ftrace {
   * @len:       length of the data packet
   */
  static void notrace
-stm_ftrace_write(const void *buf, unsigned int len)
+stm_ftrace_write(struct trace_export *export, const void *buf, unsigned int len)
  {
-       stm_source_write(&stm_ftrace.data, STM_FTRACE_CHAN, buf, len);
+       struct stm_ftrace *stm = container_of(export, struct stm_ftrace, ftrace);
+
+       stm_source_write(&stm->data, STM_FTRACE_CHAN, buf, len);
  }
  
  static int stm_ftrace_link(struct stm_source_data *data)
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c

index 0d05dadb2dc58a1d8599869cab82baed63ad165c..44cffad43701f4839096bbde5c5937ee22cce135 100644 (file)
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -379,7 +379,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev)
         return 0;
  }
  
-static struct platform_device_id cht_wc_i2c_adap_id_table[] = {
+static const struct platform_device_id cht_wc_i2c_adap_id_table[] = {
         { .name = "cht_wcove_ext_chgr" },
         {},
  };
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c

index 174579d32e5f39ecdc44d2c230b55fbfb5d073e2..462948e2c5354e64a09fa769eb1519f9d421d629 100644 (file)
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -983,7 +983,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
  
         if (adapdata->smba) {
                 i2c_del_adapter(adap);
-               if (adapdata->port == (0 << 1)) {
+               if (adapdata->port == (0 << piix4_port_shift_sb800)) {
                         release_region(adapdata->smba, SMBIOSIZE);
                         if (adapdata->sb800_main)
                                 release_region(SB800_PIIX4_SMB_IDX, 2);
diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h

index dab51761f8c52b0aab12e4b49334aa1607d0a7a0..d4f9cef251acf457f1dba743e141dadc7dc59d1b 100644 (file)
--- a/drivers/i2c/busses/i2c-stm32.h
+++ b/drivers/i2c/busses/i2c-stm32.h
@@ -1,10 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * i2c-stm32.h
   *
   * Copyright (C) M'boumba Cedric Madianga 2017
+ * Copyright (C) STMicroelectronics 2017
   * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
   *
- * License terms:  GNU General Public License (GPL), version 2
   */
  
  #ifndef _I2C_STM32_H
diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c

index 4ec108496f15cdf5dcedd3bad1b368cac5482bd8..47c8d00de53f95377e857633035118ba9ed5be31 100644 (file)
--- a/drivers/i2c/busses/i2c-stm32f4.c
+++ b/drivers/i2c/busses/i2c-stm32f4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Driver for STMicroelectronics STM32 I2C controller
   *
@@ -6,11 +7,11 @@
   * http://www.st.com/resource/en/reference_manual/DM00031020.pdf
   *
   * Copyright (C) M'boumba Cedric Madianga 2016
+ * Copyright (C) STMicroelectronics 2017
   * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
   *
   * This driver is based on i2c-st.c
   *
- * License terms:  GNU General Public License (GPL), version 2
   */
  
  #include <linux/clk.h>
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c

index d4a6e9c2e9aaeaa679bb159ade420bcdb37e6988..b445b3bb0bb11fe262363042a2eb56e458f6ffc2 100644 (file)
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Driver for STMicroelectronics STM32F7 I2C controller
   *
@@ -7,11 +8,11 @@
   * http://www.st.com/resource/en/reference_manual/dm00124865.pdf
   *
   * Copyright (C) M'boumba Cedric Madianga 2017
+ * Copyright (C) STMicroelectronics 2017
   * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
   *
   * This driver is based on i2c-stm32f4.c
   *
- * License terms:  GNU General Public License (GPL), version 2
   */
  #include <linux/clk.h>
  #include <linux/delay.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c

index f6983357145de1adeedd2d8a1bb0401a848b34c0..6294a7001d33bee54b4c516fcfa5faff7556b28b 100644 (file)
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4458,7 +4458,7 @@ out:
         return skb->len;
  }
  
-static const struct rdma_nl_cbs cma_cb_table[] = {
+static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
         [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
  };
  
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c

index 5e1be4949d5fa8d4cfa4d802fa34a5ff71d4701b..30914f3baa5f1ee5b43695c96f56c54b8ce2a2df 100644 (file)
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1146,7 +1146,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
  }
  EXPORT_SYMBOL(ib_get_net_dev_by_params);
  
-static const struct rdma_nl_cbs ibnl_ls_cb_table[] = {
+static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
         [RDMA_NL_LS_OP_RESOLVE] = {
                 .doit = ib_nl_handle_resolve_resp,
                 .flags = RDMA_NL_ADMIN_PERM,
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c

index e9e189ec7502ca90ccdb25a58ade3bb0167ee731..5d676cff41f496ce519f4dc000eda17f6fd43999 100644 (file)
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason)
  }
  EXPORT_SYMBOL(iwcm_reject_msg);
  
-static struct rdma_nl_cbs iwcm_nl_cb_table[] = {
+static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
         [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
         [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
         [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c

index 2fae850a3eff6a92703aed33975af9b1e0fc3835..9a05245a1acf4a7fd010fb7f038e84d7bbf5003e 100644 (file)
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -303,7 +303,7 @@ out:        cb->args[0] = idx;
         return skb->len;
  }
  
-static const struct rdma_nl_cbs nldev_cb_table[] = {
+static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
         [RDMA_NLDEV_CMD_GET] = {
                 .doit = nldev_get_doit,
                 .dump = nldev_get_dumpit,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c

index a337386652b07381ec1f858ec1279fcba7128a8e..feafdb961c485c61e3842d6a946d83b1bf7176b8 100644 (file)
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -739,8 +739,11 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
         if (!rdma_protocol_ib(map->agent.device, map->agent.port_num))
                 return 0;
  
-       if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed)
-               return -EACCES;
+       if (map->agent.qp->qp_type == IB_QPT_SMI) {
+               if (!map->agent.smp_allowed)
+                       return -EACCES;
+               return 0;
+       }
  
         return ib_security_pkey_access(map->agent.device,
                                        map->agent.port_num,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c

index 16d55710b1162aa11ca2d8ed3dd1c1690f60acc8..d0202bb176a4a6a826b27f2b4327691e334ad4ea 100644 (file)
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1971,6 +1971,12 @@ static int modify_qp(struct ib_uverbs_file *file,
                 goto release_qp;
         }
  
+       if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
+           !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) {
+               ret = -EINVAL;
+               goto release_qp;
+       }
+
         attr->qp_state            = cmd->base.qp_state;
         attr->cur_qp_state        = cmd->base.cur_qp_state;
         attr->path_mtu            = cmd->base.path_mtu;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c

index ea55e95cd2c5df33bf9de567eb685fae3cbaea1c..b7bfc536e00fd8c7b241c0f56539d1394d235acf 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -395,6 +395,11 @@ next_cqe:
  
  static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
  {
+       if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) {
+               WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
+               return 0;
+       }
+
         if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
                 return 0;
  
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c

index 5ee7fe433136bc22dc7a55ce9bad4930ee49ae1d..38bddd02a9437470e0f3ed98a7e55afbc8cc7384 100644 (file)
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -868,7 +868,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  
         qhp = to_c4iw_qp(ibqp);
         spin_lock_irqsave(&qhp->lock, flag);
-       if (t4_wq_in_error(&qhp->wq)) {
+
+       /*
+        * If the qp has been flushed, then just insert a special
+        * drain cqe.
+        */
+       if (qhp->wq.flushed) {
                 spin_unlock_irqrestore(&qhp->lock, flag);
                 complete_sq_drain_wr(qhp, wr);
                 return err;
@@ -1011,7 +1016,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
  
         qhp = to_c4iw_qp(ibqp);
         spin_lock_irqsave(&qhp->lock, flag);
-       if (t4_wq_in_error(&qhp->wq)) {
+
+       /*
+        * If the qp has been flushed, then just insert a special
+        * drain cqe.
+        */
+       if (qhp->wq.flushed) {
                 spin_unlock_irqrestore(&qhp->lock, flag);
                 complete_rq_drain_wr(qhp, wr);
                 return err;
@@ -1285,21 +1295,21 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
         spin_unlock_irqrestore(&rchp->lock, flag);
  
         if (schp == rchp) {
-               if (t4_clear_cq_armed(&rchp->cq) &&
-                   (rq_flushed || sq_flushed)) {
+               if ((rq_flushed || sq_flushed) &&
+                   t4_clear_cq_armed(&rchp->cq)) {
                         spin_lock_irqsave(&rchp->comp_handler_lock, flag);
                         (*rchp->ibcq.comp_handler)(&rchp->ibcq,
                                                    rchp->ibcq.cq_context);
                         spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
                 }
         } else {
-               if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+               if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
                         spin_lock_irqsave(&rchp->comp_handler_lock, flag);
                         (*rchp->ibcq.comp_handler)(&rchp->ibcq,
                                                    rchp->ibcq.cq_context);
                         spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
                 }
-               if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+               if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
                         spin_lock_irqsave(&schp->comp_handler_lock, flag);
                         (*schp->ibcq.comp_handler)(&schp->ibcq,
                                                    schp->ibcq.cq_context);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c

index 013049bcdb53d5fb95ca61d1ab9640aaa62b0756..caf490ab24c809e403f07bf2471bd2e1290e36b7 100644 (file)
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -666,6 +666,19 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
                 return (-EOPNOTSUPP);
         }
  
+       if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4      |
+                                         MLX4_IB_RX_HASH_DST_IPV4      |
+                                         MLX4_IB_RX_HASH_SRC_IPV6      |
+                                         MLX4_IB_RX_HASH_DST_IPV6      |
+                                         MLX4_IB_RX_HASH_SRC_PORT_TCP  |
+                                         MLX4_IB_RX_HASH_DST_PORT_TCP  |
+                                         MLX4_IB_RX_HASH_SRC_PORT_UDP  |
+                                         MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+               pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
+                        ucmd->rx_hash_fields_mask);
+               return (-EOPNOTSUPP);
+       }
+
         if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) &&
             (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
                 rss_ctx->flags = MLX4_RSS_IPV4;
@@ -691,11 +704,11 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
                         return (-EOPNOTSUPP);
                 }
  
-               if (rss_ctx->flags & MLX4_RSS_IPV4) {
+               if (rss_ctx->flags & MLX4_RSS_IPV4)
                         rss_ctx->flags |= MLX4_RSS_UDP_IPV4;
-               } else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+               if (rss_ctx->flags & MLX4_RSS_IPV6)
                         rss_ctx->flags |= MLX4_RSS_UDP_IPV6;
-               } else {
+               if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
                         pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n");
                         return (-EOPNOTSUPP);
                 }
@@ -707,15 +720,14 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
  
         if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) &&
             (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
-               if (rss_ctx->flags & MLX4_RSS_IPV4) {
+               if (rss_ctx->flags & MLX4_RSS_IPV4)
                         rss_ctx->flags |= MLX4_RSS_TCP_IPV4;
-               } else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+               if (rss_ctx->flags & MLX4_RSS_IPV6)
                         rss_ctx->flags |= MLX4_RSS_TCP_IPV6;
-               } else {
+               if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
                         pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n");
                         return (-EOPNOTSUPP);
                 }
-
         } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) ||
                    (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
                 pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

index 87f4bd99cdf7102e1e6e43c23a03615103d13b2c..2c13123bfd69499e3ac7661871d176c57979664b 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1145,6 +1145,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
         noio_flag = memalloc_noio_save();
         p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring));
         if (!p->tx_ring) {
+               memalloc_noio_restore(noio_flag);
                 ret = -ENOMEM;
                 goto err_tx;
         }
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c

index b8ac591aaaa7070bfbd6d32c20993fb9130961f8..c546b567f3b50a3f43b0c074e9319ca908ec5971 100644 (file)
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1611,7 +1611,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
         int l;
         struct dm_buffer *b, *tmp;
         unsigned long freed = 0;
-       unsigned long count = nr_to_scan;
+       unsigned long count = c->n_buffers[LIST_CLEAN] +
+                             c->n_buffers[LIST_DIRTY];
         unsigned long retain_target = get_retain_buffers(c);
  
         for (l = 0; l < LIST_SIZE; l++) {
@@ -1647,8 +1648,11 @@ static unsigned long
  dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
  {
         struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
+       unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
+                             READ_ONCE(c->n_buffers[LIST_DIRTY]);
+       unsigned long retain_target = get_retain_buffers(c);
  
-       return READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]);
+       return (count < retain_target) ? 0 : (count - retain_target);
  }
  
  /*
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c

index cf23a14f9c6a6572955746f040802dcfd34d801d..47407e43b96a168eed8660a7284a60132284d189 100644 (file)
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3472,18 +3472,18 @@ static int __init dm_cache_init(void)
  {
         int r;
  
-       r = dm_register_target(&cache_target);
-       if (r) {
-               DMERR("cache target registration failed: %d", r);
-               return r;
-       }
-
         migration_cache = KMEM_CACHE(dm_cache_migration, 0);
         if (!migration_cache) {
                 dm_unregister_target(&cache_target);
                 return -ENOMEM;
         }
  
+       r = dm_register_target(&cache_target);
+       if (r) {
+               DMERR("cache target registration failed: %d", r);
+               return r;
+       }
+
         return 0;
  }
  
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c

index c8faa2b8584268f75a8177f39677b94edba55289..f7810cc869ac883e11b60e0ad3f29253444d411a 100644 (file)
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -457,6 +457,38 @@ do {                                                                       \
                  dm_noflush_suspending((m)->ti));                       \
  } while (0)
  
+/*
+ * Check whether bios must be queued in the device-mapper core rather
+ * than here in the target.
+ *
+ * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold
+ * the same value then we are not between multipath_presuspend()
+ * and multipath_resume() calls and we have no need to check
+ * for the DMF_NOFLUSH_SUSPENDING flag.
+ */
+static bool __must_push_back(struct multipath *m, unsigned long flags)
+{
+       return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) !=
+                test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) &&
+               dm_noflush_suspending(m->ti));
+}
+
+/*
+ * Following functions use READ_ONCE to get atomic access to
+ * all m->flags to avoid taking spinlock
+ */
+static bool must_push_back_rq(struct multipath *m)
+{
+       unsigned long flags = READ_ONCE(m->flags);
+       return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags);
+}
+
+static bool must_push_back_bio(struct multipath *m)
+{
+       unsigned long flags = READ_ONCE(m->flags);
+       return __must_push_back(m, flags);
+}
+
  /*
   * Map cloned requests (request-based multipath)
   */
@@ -478,7 +510,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
                 pgpath = choose_pgpath(m, nr_bytes);
  
         if (!pgpath) {
-               if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+               if (must_push_back_rq(m))
                         return DM_MAPIO_DELAY_REQUEUE;
                 dm_report_EIO(m);       /* Failed */
                 return DM_MAPIO_KILL;
@@ -553,7 +585,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
         }
  
         if (!pgpath) {
-               if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+               if (must_push_back_bio(m))
                         return DM_MAPIO_REQUEUE;
                 dm_report_EIO(m);
                 return DM_MAPIO_KILL;
@@ -651,8 +683,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
         assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags,
                    (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) ||
                    (!save_old_value && queue_if_no_path));
-       assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags,
-                  queue_if_no_path || dm_noflush_suspending(m->ti));
+       assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
         spin_unlock_irqrestore(&m->lock, flags);
  
         if (!queue_if_no_path) {
@@ -1486,7 +1517,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
                         fail_path(pgpath);
  
                 if (atomic_read(&m->nr_valid_paths) == 0 &&
-                   !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+                   !must_push_back_rq(m)) {
                         if (error == BLK_STS_IOERR)
                                 dm_report_EIO(m);
                         /* complete with the original error */
@@ -1521,8 +1552,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
  
         if (atomic_read(&m->nr_valid_paths) == 0 &&
             !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-               dm_report_EIO(m);
-               *error = BLK_STS_IOERR;
+               if (must_push_back_bio(m)) {
+                       r = DM_ENDIO_REQUEUE;
+               } else {
+                       dm_report_EIO(m);
+                       *error = BLK_STS_IOERR;
+               }
                 goto done;
         }
  
@@ -1957,13 +1992,6 @@ static int __init dm_multipath_init(void)
  {
         int r;
  
-       r = dm_register_target(&multipath_target);
-       if (r < 0) {
-               DMERR("request-based register failed %d", r);
-               r = -EINVAL;
-               goto bad_register_target;
-       }
-
         kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
         if (!kmultipathd) {
                 DMERR("failed to create workqueue kmpathd");
@@ -1985,13 +2013,20 @@ static int __init dm_multipath_init(void)
                 goto bad_alloc_kmpath_handlerd;
         }
  
+       r = dm_register_target(&multipath_target);
+       if (r < 0) {
+               DMERR("request-based register failed %d", r);
+               r = -EINVAL;
+               goto bad_register_target;
+       }
+
         return 0;
  
+bad_register_target:
+       destroy_workqueue(kmpath_handlerd);
  bad_alloc_kmpath_handlerd:
         destroy_workqueue(kmultipathd);
  bad_alloc_kmultipathd:
-       dm_unregister_target(&multipath_target);
-bad_register_target:
         return r;
  }
  
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c

index 1113b42e1edae4029f550b71c635ea80c76a46b9..a0613bd8ed00efc17d545a3335c39cb6cfb83919 100644 (file)
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2411,24 +2411,6 @@ static int __init dm_snapshot_init(void)
                 return r;
         }
  
-       r = dm_register_target(&snapshot_target);
-       if (r < 0) {
-               DMERR("snapshot target register failed %d", r);
-               goto bad_register_snapshot_target;
-       }
-
-       r = dm_register_target(&origin_target);
-       if (r < 0) {
-               DMERR("Origin target register failed %d", r);
-               goto bad_register_origin_target;
-       }
-
-       r = dm_register_target(&merge_target);
-       if (r < 0) {
-               DMERR("Merge target register failed %d", r);
-               goto bad_register_merge_target;
-       }
-
         r = init_origin_hash();
         if (r) {
                 DMERR("init_origin_hash failed.");
@@ -2449,19 +2431,37 @@ static int __init dm_snapshot_init(void)
                 goto bad_pending_cache;
         }
  
+       r = dm_register_target(&snapshot_target);
+       if (r < 0) {
+               DMERR("snapshot target register failed %d", r);
+               goto bad_register_snapshot_target;
+       }
+
+       r = dm_register_target(&origin_target);
+       if (r < 0) {
+               DMERR("Origin target register failed %d", r);
+               goto bad_register_origin_target;
+       }
+
+       r = dm_register_target(&merge_target);
+       if (r < 0) {
+               DMERR("Merge target register failed %d", r);
+               goto bad_register_merge_target;
+       }
+
         return 0;
  
-bad_pending_cache:
-       kmem_cache_destroy(exception_cache);
-bad_exception_cache:
-       exit_origin_hash();
-bad_origin_hash:
-       dm_unregister_target(&merge_target);
  bad_register_merge_target:
         dm_unregister_target(&origin_target);
  bad_register_origin_target:
         dm_unregister_target(&snapshot_target);
  bad_register_snapshot_target:
+       kmem_cache_destroy(pending_cache);
+bad_pending_cache:
+       kmem_cache_destroy(exception_cache);
+bad_exception_cache:
+       exit_origin_hash();
+bad_origin_hash:
         dm_exception_store_exit();
  
         return r;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 88130b5d95f909ead8441dec7f3fb5d80a7914c7..aaffd0c0ee9a76c71f23f9bb1074ec8057b8c6f7 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -453,14 +453,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
  
                 refcount_set(&dd->count, 1);
                 list_add(&dd->list, &t->devices);
+               goto out;
  
         } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
                 r = upgrade_mode(dd, mode, t->md);
                 if (r)
                         return r;
-               refcount_inc(&dd->count);
         }
-
+       refcount_inc(&dd->count);
+out:
         *result = dd->dm_dev;
         return 0;
  }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index 89e5dff9b4cfc1b87049529238c5c01978345b81..f91d771fff4b6e9d9a488a7a67916326a1e85897 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4355,30 +4355,28 @@ static struct target_type thin_target = {
  
  static int __init dm_thin_init(void)
  {
-       int r;
+       int r = -ENOMEM;
  
         pool_table_init();
  
+       _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
+       if (!_new_mapping_cache)
+               return r;
+
         r = dm_register_target(&thin_target);
         if (r)
-               return r;
+               goto bad_new_mapping_cache;
  
         r = dm_register_target(&pool_target);
         if (r)
-               goto bad_pool_target;
-
-       r = -ENOMEM;
-
-       _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
-       if (!_new_mapping_cache)
-               goto bad_new_mapping_cache;
+               goto bad_thin_target;
  
         return 0;
  
-bad_new_mapping_cache:
-       dm_unregister_target(&pool_target);
-bad_pool_target:
+bad_thin_target:
         dm_unregister_target(&thin_target);
+bad_new_mapping_cache:
+       kmem_cache_destroy(_new_mapping_cache);
  
         return r;
  }
diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c

index c9714072e22465d4b23d8101038f782b084b2dca..59c82cdcf48d8a508613dbc7b1c98654285de28f 100644 (file)
--- a/drivers/mfd/cros_ec_spi.c
+++ b/drivers/mfd/cros_ec_spi.c
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
         u8 *ptr;
         u8 *rx_buf;
         u8 sum;
+       u8 rx_byte;
         int ret = 0, final_ret;
  
         len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
         if (!ret) {
                 /* Verify that EC can process command */
                 for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                 break;
                         }
-                       if (ret)
-                               break;
                 }
-               if (!ret)
-                       ret = cros_ec_spi_receive_packet(ec_dev,
-                                       ec_msg->insize + sizeof(*response));
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
         }
  
+       if (!ret)
+               ret = cros_ec_spi_receive_packet(ec_dev,
+                               ec_msg->insize + sizeof(*response));
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
         final_ret = terminate_request(ec_dev);
  
         spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
         int i, len;
         u8 *ptr;
         u8 *rx_buf;
+       u8 rx_byte;
         int sum;
         int ret = 0, final_ret;
  
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
         if (!ret) {
                 /* Verify that EC can process command */
                 for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                 break;
                         }
-                       if (ret)
-                               break;
                 }
-               if (!ret)
-                       ret = cros_ec_spi_receive_response(ec_dev,
-                                       ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
         }
  
+       if (!ret)
+               ret = cros_ec_spi_receive_response(ec_dev,
+                               ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
         final_ret = terminate_request(ec_dev);
  
         spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
                            sizeof(struct ec_response_get_protocol_info);
         ec_dev->dout_size = sizeof(struct ec_host_request);
  
+       ec_spi->last_transfer_ns = ktime_get_ns();
  
         err = cros_ec_register(ec_dev);
         if (err) {
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c

index da16bf45fab43ee9a946beef340f4cd2a224156e..dc94ffc6321a84dd25ce08d0f1a9374d40d4cead 100644 (file)
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
  EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
  
  static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
-                             struct device_node *node)
+                             struct device_node *parent)
  {
+       struct device_node *node;
+
         if (pdata && pdata->codec)
                 return true;
  
-       if (of_find_node_by_name(node, "codec"))
+       node = of_get_child_by_name(parent, "codec");
+       if (node) {
+               of_node_put(node);
                 return true;
+       }
  
         return false;
  }
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c

index d66502d36ba0b3202d1c15c08540fa8aade42a32..dd19f17a1b637543965dd94e64d0d44b9178f64c 100644 (file)
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
  };
  
  
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
  {
-#ifdef CONFIG_OF
-       if (of_find_node_by_name(node, "vibra"))
+       struct device_node *node;
+
+       node = of_get_child_by_name(parent, "vibra");
+       if (node) {
+               of_node_put(node);
                 return true;
-#endif
+       }
+
         return false;
  }
  
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c

index 305a7a464d091614978e37b26a0573f66a27e8b8..4d63ac8a82e0022f10f424b4bab4dc85820d0981 100644 (file)
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -562,7 +562,7 @@ static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf,
  static int at24_read(void *priv, unsigned int off, void *val, size_t count)
  {
         struct at24_data *at24 = priv;
-       struct i2c_client *client;
+       struct device *dev = &at24->client[0]->dev;
         char *buf = val;
         int ret;
  
@@ -572,11 +572,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
         if (off + count > at24->chip.byte_len)
                 return -EINVAL;
  
-       client = at24_translate_offset(at24, &off);
-
-       ret = pm_runtime_get_sync(&client->dev);
+       ret = pm_runtime_get_sync(dev);
         if (ret < 0) {
-               pm_runtime_put_noidle(&client->dev);
+               pm_runtime_put_noidle(dev);
                 return ret;
         }
  
@@ -592,7 +590,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
                 status = at24->read_func(at24, buf, off, count);
                 if (status < 0) {
                         mutex_unlock(&at24->lock);
-                       pm_runtime_put(&client->dev);
+                       pm_runtime_put(dev);
                         return status;
                 }
                 buf += status;
@@ -602,7 +600,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
  
         mutex_unlock(&at24->lock);
  
-       pm_runtime_put(&client->dev);
+       pm_runtime_put(dev);
  
         return 0;
  }
@@ -610,7 +608,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
  static int at24_write(void *priv, unsigned int off, void *val, size_t count)
  {
         struct at24_data *at24 = priv;
-       struct i2c_client *client;
+       struct device *dev = &at24->client[0]->dev;
         char *buf = val;
         int ret;
  
@@ -620,11 +618,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
         if (off + count > at24->chip.byte_len)
                 return -EINVAL;
  
-       client = at24_translate_offset(at24, &off);
-
-       ret = pm_runtime_get_sync(&client->dev);
+       ret = pm_runtime_get_sync(dev);
         if (ret < 0) {
-               pm_runtime_put_noidle(&client->dev);
+               pm_runtime_put_noidle(dev);
                 return ret;
         }
  
@@ -640,7 +636,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
                 status = at24->write_func(at24, buf, off, count);
                 if (status < 0) {
                         mutex_unlock(&at24->lock);
-                       pm_runtime_put(&client->dev);
+                       pm_runtime_put(dev);
                         return status;
                 }
                 buf += status;
@@ -650,7 +646,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
  
         mutex_unlock(&at24->lock);
  
-       pm_runtime_put(&client->dev);
+       pm_runtime_put(dev);
  
         return 0;
  }
@@ -880,7 +876,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
         at24->nvmem_config.reg_read = at24_read;
         at24->nvmem_config.reg_write = at24_write;
         at24->nvmem_config.priv = at24;
-       at24->nvmem_config.stride = 4;
+       at24->nvmem_config.stride = 1;
         at24->nvmem_config.word_size = 1;
         at24->nvmem_config.size = chip.byte_len;
  
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c

index eda38cbe85307edd67863122e24451d269d66866..41f2a9f6851d9e74a58fd06030cb2f00517ea8d0 100644 (file)
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -32,7 +32,7 @@
  #include <linux/pci.h>
  #include <linux/mutex.h>
  #include <linux/miscdevice.h>
-#include <linux/pti.h>
+#include <linux/intel-pti.h>
  #include <linux/slab.h>
  #include <linux/uaccess.h>
  
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h

index f06cd91964ce970b0c623dd662c335eb0fc4dadb..79a5b985ccf5ee8fe5ba06b5aec717f36799794e 100644 (file)
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -75,9 +75,11 @@ struct mmc_fixup {
  #define EXT_CSD_REV_ANY (-1u)
  
  #define CID_MANFID_SANDISK      0x2
+#define CID_MANFID_ATP          0x9
  #define CID_MANFID_TOSHIBA      0x11
  #define CID_MANFID_MICRON       0x13
  #define CID_MANFID_SAMSUNG      0x15
+#define CID_MANFID_APACER       0x27
  #define CID_MANFID_KINGSTON     0x70
  #define CID_MANFID_HYNIX       0x90
  
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c

index d209fb466979015d3d668beab293e60b9657c0fc..208a762b87ef2876914d641b38fc61858429a22e 100644 (file)
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1290,7 +1290,7 @@ out_err:
  
  static void mmc_select_driver_type(struct mmc_card *card)
  {
-       int card_drv_type, drive_strength, drv_type;
+       int card_drv_type, drive_strength, drv_type = 0;
         int fixed_drv_type = card->host->fixed_drv_type;
  
         card_drv_type = card->ext_csd.raw_driver_strength |
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h

index f664e9cbc9f8b66aa9f4f3c8fdd55882d99c8c79..75d317623852dc9f55586e41a176311a48144e1d 100644 (file)
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -52,6 +52,14 @@ static const struct mmc_fixup mmc_blk_fixups[] = {
         MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
                   MMC_QUIRK_BLK_NO_CMD23),
  
+       /*
+        * Some SD cards lockup while using CMD23 multiblock transfers.
+        */
+       MMC_FIXUP("AF SD", CID_MANFID_ATP, CID_OEMID_ANY, add_quirk_sd,
+                 MMC_QUIRK_BLK_NO_CMD23),
+       MMC_FIXUP("APUSD", CID_MANFID_APACER, 0x5048, add_quirk_sd,
+                 MMC_QUIRK_BLK_NO_CMD23),
+
         /*
          * Some MMC cards need longer data read timeout than indicated in CSD.
          */
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c

index f80e911b8843819db8dcd1956c76ce2bf60b5ab8..73b6055774474e322b07cda4144c48b5b235a55c 100644 (file)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
         if (!ops->oobbuf)
                 ops->ooblen = 0;
  
-       if (offs < 0 || offs + ops->len >= mtd->size)
+       if (offs < 0 || offs + ops->len > mtd->size)
                 return -EINVAL;
  
         if (ops->ooblen) {
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c

index e0eb51d8c0129937b35157ccdc107e5ef54c038a..dd56a671ea4285af0f5079bc652ecf4a32410272 100644 (file)
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -1763,7 +1763,7 @@ try_dmaread:
                         err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
                                                               addr);
                         /* erased page bitflips corrected */
-                       if (err > 0)
+                       if (err >= 0)
                                 return err;
                 }
  
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c

index 484f7fbc3f7d2d11cd66fc3416e64ab38d47f852..a8bde6665c24f7e20e6103959ceee16c5d3ec5c8 100644 (file)
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
                 goto out_ce;
         }
  
-       gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
-       if (IS_ERR(gpiomtd->nwp)) {
-               ret = PTR_ERR(gpiomtd->nwp);
+       gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
+       if (IS_ERR(gpiomtd->ale)) {
+               ret = PTR_ERR(gpiomtd->ale);
                 goto out_ce;
         }
  
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c

index 50f8d4a1b9832326070045d0c294d22393001fbd..d4d824ef64e9fb395af3bc549daae72b96731e16 100644 (file)
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                 return ret;
         }
  
-       /* handle the block mark swapping */
-       block_mark_swapping(this, payload_virt, auxiliary_virt);
-
         /* Loop over status bytes, accumulating ECC status. */
         status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
  
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                 max_bitflips = max_t(unsigned int, max_bitflips, *status);
         }
  
+       /* handle the block mark swapping */
+       block_mark_swapping(this, buf, auxiliary_virt);
+
         if (oob_required) {
                 /*
                  * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c

index a7801f6668a5d0e394fd01078bb10c50998cdf56..6315774d72b3304d5ccd1729cb150c183bbac19e 100644 (file)
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -338,6 +338,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
                 cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX;
                 break;
         case PHY_INTERFACE_MODE_XGMII:
+       case PHY_INTERFACE_MODE_XAUI:
                 cmode = MV88E6XXX_PORT_STS_CMODE_XAUI;
                 break;
         case PHY_INTERFACE_MODE_RXAUI:
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h

index 57e796870595bb9a305a7579154b0dbd1cbeec60..105fdb958cefb1d28e2f57a46da522e32536c3c7 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -50,7 +50,7 @@
  #define AQ_CFG_PCI_FUNC_MSIX_IRQS   9U
  #define AQ_CFG_PCI_FUNC_PORTS       2U
  
-#define AQ_CFG_SERVICE_TIMER_INTERVAL    (2 * HZ)
+#define AQ_CFG_SERVICE_TIMER_INTERVAL    (1 * HZ)
  #define AQ_CFG_POLLING_TIMER_INTERVAL   ((unsigned int)(2 * HZ))
  
  #define AQ_CFG_SKB_FRAGS_MAX   32U
@@ -80,6 +80,7 @@
  #define AQ_CFG_DRV_VERSION     __stringify(NIC_MAJOR_DRIVER_VERSION)"."\
                                 __stringify(NIC_MINOR_DRIVER_VERSION)"."\
                                 __stringify(NIC_BUILD_DRIVER_VERSION)"."\
-                               __stringify(NIC_REVISION_DRIVER_VERSION)
+                               __stringify(NIC_REVISION_DRIVER_VERSION) \
+                               AQ_CFG_DRV_VERSION_SUFFIX
  
  #endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c

index 70efb7467bf3a1c6f298e9d5697b38307e91dab7..f2d8063a2cefd8f7581f0e2182b81b1ce773a92a 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -66,14 +66,14 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = {
         "OutUCast",
         "OutMCast",
         "OutBCast",
-       "InUCastOctects",
-       "OutUCastOctects",
-       "InMCastOctects",
-       "OutMCastOctects",
-       "InBCastOctects",
-       "OutBCastOctects",
-       "InOctects",
-       "OutOctects",
+       "InUCastOctets",
+       "OutUCastOctets",
+       "InMCastOctets",
+       "OutMCastOctets",
+       "InBCastOctets",
+       "OutBCastOctets",
+       "InOctets",
+       "OutOctets",
         "InPacketsDma",
         "OutPacketsDma",
         "InOctetsDma",
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h

index 0207927dc8a6ab4ac76c46fb17669b7e50e7ae1e..b3825de6cdfb03b7f176e4b5a4cee00a20306982 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -46,6 +46,28 @@ struct aq_hw_link_status_s {
         unsigned int mbps;
  };
  
+struct aq_stats_s {
+       u64 uprc;
+       u64 mprc;
+       u64 bprc;
+       u64 erpt;
+       u64 uptc;
+       u64 mptc;
+       u64 bptc;
+       u64 erpr;
+       u64 mbtc;
+       u64 bbtc;
+       u64 mbrc;
+       u64 bbrc;
+       u64 ubrc;
+       u64 ubtc;
+       u64 dpc;
+       u64 dma_pkt_rc;
+       u64 dma_pkt_tc;
+       u64 dma_oct_rc;
+       u64 dma_oct_tc;
+};
+
  #define AQ_HW_IRQ_INVALID 0U
  #define AQ_HW_IRQ_LEGACY  1U
  #define AQ_HW_IRQ_MSI     2U
@@ -85,7 +107,9 @@ struct aq_hw_ops {
         void (*destroy)(struct aq_hw_s *self);
  
         int (*get_hw_caps)(struct aq_hw_s *self,
-                          struct aq_hw_caps_s *aq_hw_caps);
+                          struct aq_hw_caps_s *aq_hw_caps,
+                          unsigned short device,
+                          unsigned short subsystem_device);
  
         int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
                                unsigned int frags);
@@ -164,8 +188,7 @@ struct aq_hw_ops {
  
         int (*hw_update_stats)(struct aq_hw_s *self);
  
-       int (*hw_get_hw_stats)(struct aq_hw_s *self, u64 *data,
-                              unsigned int *p_count);
+       struct aq_stats_s *(*hw_get_hw_stats)(struct aq_hw_s *self);
  
         int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
  
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c

index 78dfb2ab78cefa0586168a0d043348c3488f0f20..75a894a9251c2114e7d30d40ab795f89ded413fe 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -37,6 +37,8 @@ static unsigned int aq_itr_rx;
  module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644);
  MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate");
  
+static void aq_nic_update_ndev_stats(struct aq_nic_s *self);
+
  static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
  {
         struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
@@ -166,11 +168,8 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
  static void aq_nic_service_timer_cb(struct timer_list *t)
  {
         struct aq_nic_s *self = from_timer(self, t, service_timer);
-       struct net_device *ndev = aq_nic_get_ndev(self);
+       int ctimer = AQ_CFG_SERVICE_TIMER_INTERVAL;
         int err = 0;
-       unsigned int i = 0U;
-       struct aq_ring_stats_rx_s stats_rx;
-       struct aq_ring_stats_tx_s stats_tx;
  
         if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY))
                 goto err_exit;
@@ -182,23 +181,14 @@ static void aq_nic_service_timer_cb(struct timer_list *t)
         if (self->aq_hw_ops.hw_update_stats)
                 self->aq_hw_ops.hw_update_stats(self->aq_hw);
  
-       memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
-       memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
-       for (i = AQ_DIMOF(self->aq_vec); i--;) {
-               if (self->aq_vec[i])
-                       aq_vec_add_stats(self->aq_vec[i], &stats_rx, &stats_tx);
-       }
+       aq_nic_update_ndev_stats(self);
  
-       ndev->stats.rx_packets = stats_rx.packets;
-       ndev->stats.rx_bytes = stats_rx.bytes;
-       ndev->stats.rx_errors = stats_rx.errors;
-       ndev->stats.tx_packets = stats_tx.packets;
-       ndev->stats.tx_bytes = stats_tx.bytes;
-       ndev->stats.tx_errors = stats_tx.errors;
+       /* If no link - use faster timer rate to detect link up asap */
+       if (!netif_carrier_ok(self->ndev))
+               ctimer = max(ctimer / 2, 1);
  
  err_exit:
-       mod_timer(&self->service_timer,
-                 jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL);
+       mod_timer(&self->service_timer, jiffies + ctimer);
  }
  
  static void aq_nic_polling_timer_cb(struct timer_list *t)
@@ -222,7 +212,7 @@ static struct net_device *aq_nic_ndev_alloc(void)
  
  struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
                                    const struct ethtool_ops *et_ops,
-                                  struct device *dev,
+                                  struct pci_dev *pdev,
                                    struct aq_pci_func_s *aq_pci_func,
                                    unsigned int port,
                                    const struct aq_hw_ops *aq_hw_ops)
@@ -242,7 +232,7 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
         ndev->netdev_ops = ndev_ops;
         ndev->ethtool_ops = et_ops;
  
-       SET_NETDEV_DEV(ndev, dev);
+       SET_NETDEV_DEV(ndev, &pdev->dev);
  
         ndev->if_port = port;
         self->ndev = ndev;
@@ -254,7 +244,8 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
  
         self->aq_hw = self->aq_hw_ops.create(aq_pci_func, self->port,
                                                 &self->aq_hw_ops);
-       err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps);
+       err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps,
+                                         pdev->device, pdev->subsystem_device);
         if (err < 0)
                 goto err_exit;
  
@@ -749,16 +740,40 @@ int aq_nic_get_regs_count(struct aq_nic_s *self)
  
  void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
  {
-       struct aq_vec_s *aq_vec = NULL;
         unsigned int i = 0U;
         unsigned int count = 0U;
-       int err = 0;
+       struct aq_vec_s *aq_vec = NULL;
+       struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw);
  
-       err = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw, data, &count);
-       if (err < 0)
+       if (!stats)
                 goto err_exit;
  
-       data += count;
+       data[i] = stats->uprc + stats->mprc + stats->bprc;
+       data[++i] = stats->uprc;
+       data[++i] = stats->mprc;
+       data[++i] = stats->bprc;
+       data[++i] = stats->erpt;
+       data[++i] = stats->uptc + stats->mptc + stats->bptc;
+       data[++i] = stats->uptc;
+       data[++i] = stats->mptc;
+       data[++i] = stats->bptc;
+       data[++i] = stats->ubrc;
+       data[++i] = stats->ubtc;
+       data[++i] = stats->mbrc;
+       data[++i] = stats->mbtc;
+       data[++i] = stats->bbrc;
+       data[++i] = stats->bbtc;
+       data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
+       data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
+       data[++i] = stats->dma_pkt_rc;
+       data[++i] = stats->dma_pkt_tc;
+       data[++i] = stats->dma_oct_rc;
+       data[++i] = stats->dma_oct_tc;
+       data[++i] = stats->dpc;
+
+       i++;
+
+       data += i;
         count = 0U;
  
         for (i = 0U, aq_vec = self->aq_vec[0];
@@ -768,7 +783,20 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
         }
  
  err_exit:;
-       (void)err;
+}
+
+static void aq_nic_update_ndev_stats(struct aq_nic_s *self)
+{
+       struct net_device *ndev = self->ndev;
+       struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw);
+
+       ndev->stats.rx_packets = stats->uprc + stats->mprc + stats->bprc;
+       ndev->stats.rx_bytes = stats->ubrc + stats->mbrc + stats->bbrc;
+       ndev->stats.rx_errors = stats->erpr;
+       ndev->stats.tx_packets = stats->uptc + stats->mptc + stats->bptc;
+       ndev->stats.tx_bytes = stats->ubtc + stats->mbtc + stats->bbtc;
+       ndev->stats.tx_errors = stats->erpt;
+       ndev->stats.multicast = stats->mprc;
  }
  
  void aq_nic_get_link_ksettings(struct aq_nic_s *self,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h

index 4309983acdd6f7502fa05869f79336fd459dc2fe..3c9f8db03d5f2a576c83064b18e930d5af6b7e1c 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -71,7 +71,7 @@ struct aq_nic_cfg_s {
  
  struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
                                    const struct ethtool_ops *et_ops,
-                                  struct device *dev,
+                                  struct pci_dev *pdev,
                                    struct aq_pci_func_s *aq_pci_func,
                                    unsigned int port,
                                    const struct aq_hw_ops *aq_hw_ops);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c

index cadaa646c89f4b741382b4beee72c6ec3e3bfc18..58c29d04b186e634686ca667bc5afe9bd86e63a3 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -51,7 +51,8 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
         pci_set_drvdata(pdev, self);
         self->pdev = pdev;
  
-       err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps);
+       err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps, pdev->device,
+                                    pdev->subsystem_device);
         if (err < 0)
                 goto err_exit;
  
@@ -59,7 +60,7 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops,
  
         for (port = 0; port < self->ports; ++port) {
                 struct aq_nic_s *aq_nic = aq_nic_alloc_cold(ndev_ops, eth_ops,
-                                                           &pdev->dev, self,
+                                                           pdev, self,
                                                             port, aq_hw_ops);
  
                 if (!aq_nic) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c

index 07b3c49a16a4266b4fb312bb79198f9ba0c60f04..f18dce14c93cfa89f5c091db5f5b06c6e882aa68 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -18,9 +18,20 @@
  #include "hw_atl_a0_internal.h"
  
  static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self,
-                                struct aq_hw_caps_s *aq_hw_caps)
+                                struct aq_hw_caps_s *aq_hw_caps,
+                                unsigned short device,
+                                unsigned short subsystem_device)
  {
         memcpy(aq_hw_caps, &hw_atl_a0_hw_caps_, sizeof(*aq_hw_caps));
+
+       if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001)
+               aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G;
+
+       if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) {
+               aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G;
+               aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_5G;
+       }
+
         return 0;
  }
  
@@ -333,6 +344,10 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self,
         hw_atl_a0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
         hw_atl_a0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
  
+       /* Reset link status and read out initial hardware counters */
+       self->aq_link_status.mbps = 0;
+       hw_atl_utils_update_stats(self);
+
         err = aq_hw_err_from_flags(self);
         if (err < 0)
                 goto err_exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c

index ec68c20efcbdb6079b9dba4b8200ad8f1f450233..e4a22ce7bf09d50f21ffed6425783d7be7c88da5 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -16,11 +16,23 @@
  #include "hw_atl_utils.h"
  #include "hw_atl_llh.h"
  #include "hw_atl_b0_internal.h"
+#include "hw_atl_llh_internal.h"
  
  static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self,
-                                struct aq_hw_caps_s *aq_hw_caps)
+                                struct aq_hw_caps_s *aq_hw_caps,
+                                unsigned short device,
+                                unsigned short subsystem_device)
  {
         memcpy(aq_hw_caps, &hw_atl_b0_hw_caps_, sizeof(*aq_hw_caps));
+
+       if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001)
+               aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G;
+
+       if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) {
+               aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G;
+               aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_5G;
+       }
+
         return 0;
  }
  
@@ -357,6 +369,7 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self,
         };
  
         int err = 0;
+       u32 val;
  
         self->aq_nic_cfg = aq_nic_cfg;
  
@@ -374,6 +387,20 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self,
         hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
         hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
  
+       /* Force limit MRRS on RDM/TDM to 2K */
+       val = aq_hw_read_reg(self, pci_reg_control6_adr);
+       aq_hw_write_reg(self, pci_reg_control6_adr, (val & ~0x707) | 0x404);
+
+       /* TX DMA total request limit. B0 hardware is not capable to
+        * handle more than (8K-MRRS) incoming DMA data.
+        * Value 24 in 256byte units
+        */
+       aq_hw_write_reg(self, tx_dma_total_req_limit_adr, 24);
+
+       /* Reset link status and read out initial hardware counters */
+       self->aq_link_status.mbps = 0;
+       hw_atl_utils_update_stats(self);
+
         err = aq_hw_err_from_flags(self);
         if (err < 0)
                 goto err_exit;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h

index 5527fc0e5942d6a8f4e14071a9e40a12e02ecf27..93450ec930e89f71c83253ac094e9928206df75f 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -2343,6 +2343,9 @@
  #define tx_dma_desc_base_addrmsw_adr(descriptor) \
                         (0x00007c04u + (descriptor) * 0x40)
  
+/* tx dma total request limit */
+#define tx_dma_total_req_limit_adr 0x00007b20u
+
  /* tx interrupt moderation control register definitions
   * Preprocessor definitions for TX Interrupt Moderation Control Register
   * Base Address: 0x00008980
@@ -2369,6 +2372,9 @@
  /* default value of bitfield reg_res_dsbl */
  #define pci_reg_res_dsbl_default 0x1
  
+/* PCI core control register */
+#define pci_reg_control6_adr 0x1014u
+
  /* global microprocessor scratch pad definitions */
  #define glb_cpu_scratch_scp_adr(scratch_scp) (0x00000300u + (scratch_scp) * 0x4)
  
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c

index 1fe016fc4bc704361ca68ee39f3e443715505e8c..f2ce12ed4218ee8d39a462b6ffc1fada96358506 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -503,73 +503,43 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self)
         struct hw_atl_s *hw_self = PHAL_ATLANTIC;
         struct hw_aq_atl_utils_mbox mbox;
  
-       if (!self->aq_link_status.mbps)
-               return 0;
-
         hw_atl_utils_mpi_read_stats(self, &mbox);
  
  #define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \
                         mbox.stats._N_ - hw_self->last_stats._N_)
-
-       AQ_SDELTA(uprc);
-       AQ_SDELTA(mprc);
-       AQ_SDELTA(bprc);
-       AQ_SDELTA(erpt);
-
-       AQ_SDELTA(uptc);
-       AQ_SDELTA(mptc);
-       AQ_SDELTA(bptc);
-       AQ_SDELTA(erpr);
-
-       AQ_SDELTA(ubrc);
-       AQ_SDELTA(ubtc);
-       AQ_SDELTA(mbrc);
-       AQ_SDELTA(mbtc);
-       AQ_SDELTA(bbrc);
-       AQ_SDELTA(bbtc);
-       AQ_SDELTA(dpc);
-
+       if (self->aq_link_status.mbps) {
+               AQ_SDELTA(uprc);
+               AQ_SDELTA(mprc);
+               AQ_SDELTA(bprc);
+               AQ_SDELTA(erpt);
+
+               AQ_SDELTA(uptc);
+               AQ_SDELTA(mptc);
+               AQ_SDELTA(bptc);
+               AQ_SDELTA(erpr);
+
+               AQ_SDELTA(ubrc);
+               AQ_SDELTA(ubtc);
+               AQ_SDELTA(mbrc);
+               AQ_SDELTA(mbtc);
+               AQ_SDELTA(bbrc);
+               AQ_SDELTA(bbtc);
+               AQ_SDELTA(dpc);
+       }
  #undef AQ_SDELTA
+       hw_self->curr_stats.dma_pkt_rc = stats_rx_dma_good_pkt_counterlsw_get(self);
+       hw_self->curr_stats.dma_pkt_tc = stats_tx_dma_good_pkt_counterlsw_get(self);
+       hw_self->curr_stats.dma_oct_rc = stats_rx_dma_good_octet_counterlsw_get(self);
+       hw_self->curr_stats.dma_oct_tc = stats_tx_dma_good_octet_counterlsw_get(self);
  
         memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats));
  
         return 0;
  }
  
-int hw_atl_utils_get_hw_stats(struct aq_hw_s *self,
-                             u64 *data, unsigned int *p_count)
+struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self)
  {
-       struct hw_atl_s *hw_self = PHAL_ATLANTIC;
-       struct hw_atl_stats_s *stats = &hw_self->curr_stats;
-       int i = 0;
-
-       data[i] = stats->uprc + stats->mprc + stats->bprc;
-       data[++i] = stats->uprc;
-       data[++i] = stats->mprc;
-       data[++i] = stats->bprc;
-       data[++i] = stats->erpt;
-       data[++i] = stats->uptc + stats->mptc + stats->bptc;
-       data[++i] = stats->uptc;
-       data[++i] = stats->mptc;
-       data[++i] = stats->bptc;
-       data[++i] = stats->ubrc;
-       data[++i] = stats->ubtc;
-       data[++i] = stats->mbrc;
-       data[++i] = stats->mbtc;
-       data[++i] = stats->bbrc;
-       data[++i] = stats->bbtc;
-       data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
-       data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
-       data[++i] = stats_rx_dma_good_pkt_counterlsw_get(self);
-       data[++i] = stats_tx_dma_good_pkt_counterlsw_get(self);
-       data[++i] = stats_rx_dma_good_octet_counterlsw_get(self);
-       data[++i] = stats_tx_dma_good_octet_counterlsw_get(self);
-       data[++i] = stats->dpc;
-
-       if (p_count)
-               *p_count = ++i;
-
-       return 0;
+       return &PHAL_ATLANTIC->curr_stats;
  }
  
  static const u32 hw_atl_utils_hw_mac_regs[] = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h

index c99cc690e425bb72907df675e04a196819cfec02..21aeca6908d3b6dac5ec5ced1285c89cdba5acf2 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -129,7 +129,7 @@ struct __packed hw_aq_atl_utils_mbox {
  struct __packed hw_atl_s {
         struct aq_hw_s base;
         struct hw_atl_stats_s last_stats;
-       struct hw_atl_stats_s curr_stats;
+       struct aq_stats_s curr_stats;
         u64 speed;
         unsigned int chip_features;
         u32 fw_ver_actual;
@@ -207,8 +207,6 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version);
  
  int hw_atl_utils_update_stats(struct aq_hw_s *self);
  
-int hw_atl_utils_get_hw_stats(struct aq_hw_s *self,
-                             u64 *data,
-                             unsigned int *p_count);
+struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
  
  #endif /* HW_ATL_UTILS_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h

index 0de858d215c22d036d6c120bafe996e12ada1287..9009f2651e706b66e50b998b95a12283cb3b0e9b 100644 (file)
--- a/drivers/net/ethernet/aquantia/atlantic/ver.h
+++ b/drivers/net/ethernet/aquantia/atlantic/ver.h
@@ -11,8 +11,10 @@
  #define VER_H
  
  #define NIC_MAJOR_DRIVER_VERSION           1
-#define NIC_MINOR_DRIVER_VERSION           5
-#define NIC_BUILD_DRIVER_VERSION           345
+#define NIC_MINOR_DRIVER_VERSION           6
+#define NIC_BUILD_DRIVER_VERSION           13
  #define NIC_REVISION_DRIVER_VERSION        0
  
+#define AQ_CFG_DRV_VERSION_SUFFIX "-kern"
+
  #endif /* VER_H */
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h

index 3c63b16d485f4bb3a7587e9e6d36dd0e121668d2..d9efbc8d783b84b128379e0ce58a43b005a8ab58 100644 (file)
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -159,6 +159,8 @@ struct arc_emac_priv {
         unsigned int link;
         unsigned int duplex;
         unsigned int speed;
+
+       unsigned int rx_missed_errors;
  };
  
  /**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c

index 3241af1ce7182824c09ee3ad774f122565f6c940..bd277b0dc615118a58b81dfba5b040e26fa667ba 100644 (file)
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
  
  #include "emac.h"
  
+static void arc_emac_restart(struct net_device *ndev);
+
  /**
   * arc_emac_tx_avail - Return the number of available slots in the tx ring.
   * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
                         continue;
                 }
  
-               pktlen = info & LEN_MASK;
-               stats->rx_packets++;
-               stats->rx_bytes += pktlen;
-               skb = rx_buff->skb;
-               skb_put(skb, pktlen);
-               skb->dev = ndev;
-               skb->protocol = eth_type_trans(skb, ndev);
-
-               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
-                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
-
-               /* Prepare the BD for next cycle */
-               rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
-                                                        EMAC_BUFFER_SIZE);
-               if (unlikely(!rx_buff->skb)) {
+               /* Prepare the BD for next cycle. netif_receive_skb()
+                * only if new skb was allocated and mapped to avoid holes
+                * in the RX fifo.
+                */
+               skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
+               if (unlikely(!skb)) {
+                       if (net_ratelimit())
+                               netdev_err(ndev, "cannot allocate skb\n");
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                         stats->rx_errors++;
-                       /* Because receive_skb is below, increment rx_dropped */
                         stats->rx_dropped++;
                         continue;
                 }
  
-               /* receive_skb only if new skb was allocated to avoid holes */
-               netif_receive_skb(skb);
-
-               addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
+               addr = dma_map_single(&ndev->dev, (void *)skb->data,
                                       EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
                 if (dma_mapping_error(&ndev->dev, addr)) {
                         if (net_ratelimit())
-                               netdev_err(ndev, "cannot dma map\n");
-                       dev_kfree_skb(rx_buff->skb);
+                               netdev_err(ndev, "cannot map dma buffer\n");
+                       dev_kfree_skb(skb);
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                         stats->rx_errors++;
+                       stats->rx_dropped++;
                         continue;
                 }
+
+               /* unmap previosly mapped skb */
+               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+               pktlen = info & LEN_MASK;
+               stats->rx_packets++;
+               stats->rx_bytes += pktlen;
+               skb_put(rx_buff->skb, pktlen);
+               rx_buff->skb->dev = ndev;
+               rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
+
+               netif_receive_skb(rx_buff->skb);
+
+               rx_buff->skb = skb;
                 dma_unmap_addr_set(rx_buff, addr, addr);
                 dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
  
@@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
         return work_done;
  }
  
+/**
+ * arc_emac_rx_miss_handle - handle R_MISS register
+ * @ndev:      Pointer to the net_device structure.
+ */
+static void arc_emac_rx_miss_handle(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       unsigned int miss;
+
+       miss = arc_reg_get(priv, R_MISS);
+       if (miss) {
+               stats->rx_errors += miss;
+               stats->rx_missed_errors += miss;
+               priv->rx_missed_errors += miss;
+       }
+}
+
+/**
+ * arc_emac_rx_stall_check - check RX stall
+ * @ndev:      Pointer to the net_device structure.
+ * @budget:    How many BDs requested to process on 1 call.
+ * @work_done: How many BDs processed
+ *
+ * Under certain conditions EMAC stop reception of incoming packets and
+ * continuously increment R_MISS register instead of saving data into
+ * provided buffer. This function detect that condition and restart
+ * EMAC.
+ */
+static void arc_emac_rx_stall_check(struct net_device *ndev,
+                                   int budget, unsigned int work_done)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct arc_emac_bd *rxbd;
+
+       if (work_done)
+               priv->rx_missed_errors = 0;
+
+       if (priv->rx_missed_errors && budget) {
+               rxbd = &priv->rxbd[priv->last_rx_bd];
+               if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
+                       arc_emac_restart(ndev);
+                       priv->rx_missed_errors = 0;
+               }
+       }
+}
+
  /**
   * arc_emac_poll - NAPI poll handler.
   * @napi:      Pointer to napi_struct structure.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
         unsigned int work_done;
  
         arc_emac_tx_clean(ndev);
+       arc_emac_rx_miss_handle(ndev);
  
         work_done = arc_emac_rx(ndev, budget);
         if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
                 arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
         }
  
+       arc_emac_rx_stall_check(ndev, budget, work_done);
+
         return work_done;
  }
  
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
                 if (status & MSER_MASK) {
                         stats->rx_missed_errors += 0x100;
                         stats->rx_errors += 0x100;
+                       priv->rx_missed_errors += 0x100;
+                       napi_schedule(&priv->napi);
                 }
  
                 if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
  }
  
  
+/**
+ * arc_emac_restart - Restart EMAC
+ * @ndev:      Pointer to net_device structure.
+ *
+ * This function do hardware reset of EMAC in order to restore
+ * network packets reception.
+ */
+static void arc_emac_restart(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       int i;
+
+       if (net_ratelimit())
+               netdev_warn(ndev, "restarting stalled EMAC\n");
+
+       netif_stop_queue(ndev);
+
+       /* Disable interrupts */
+       arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Disable EMAC */
+       arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+       /* Return the sk_buff to system */
+       arc_free_tx_queue(ndev);
+
+       /* Clean Tx BD's */
+       priv->txbd_curr = 0;
+       priv->txbd_dirty = 0;
+       memset(priv->txbd, 0, TX_RING_SZ);
+
+       for (i = 0; i < RX_BD_NUM; i++) {
+               struct arc_emac_bd *rxbd = &priv->rxbd[i];
+               unsigned int info = le32_to_cpu(rxbd->info);
+
+               if (!(info & FOR_EMAC)) {
+                       stats->rx_errors++;
+                       stats->rx_dropped++;
+               }
+               /* Return ownership to EMAC */
+               rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+       }
+       priv->last_rx_bd = 0;
+
+       /* Make sure info is visible to EMAC before enable */
+       wmb();
+
+       /* Enable interrupts */
+       arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Enable EMAC */
+       arc_reg_or(priv, R_CTRL, EN_MASK);
+
+       netif_start_queue(ndev);
+}
+
  static const struct net_device_ops arc_emac_netdev_ops = {
         .ndo_open               = arc_emac_open,
         .ndo_stop               = arc_emac_stop,
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c

index c6163874e4e7e136fb953aff74eab4fff401cb75..16f9bee992fedfab2069a2324c38fd4a5f142c93 100644 (file)
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -199,9 +199,11 @@ static int emac_rockchip_probe(struct platform_device *pdev)
  
         /* RMII interface needs always a rate of 50MHz */
         err = clk_set_rate(priv->refclk, 50000000);
-       if (err)
+       if (err) {
                 dev_err(dev,
                         "failed to change reference clock rate (%d)\n", err);
+               goto out_regulator_disable;
+       }
  
         if (priv->soc_data->need_div_macclk) {
                 priv->macclk = devm_clk_get(dev, "macclk");
@@ -230,12 +232,14 @@ static int emac_rockchip_probe(struct platform_device *pdev)
         err = arc_emac_probe(ndev, interface);
         if (err) {
                 dev_err(dev, "failed to probe arc emac (%d)\n", err);
-               goto out_regulator_disable;
+               goto out_clk_disable_macclk;
         }
  
         return 0;
+
  out_clk_disable_macclk:
-       clk_disable_unprepare(priv->macclk);
+       if (priv->soc_data->need_div_macclk)
+               clk_disable_unprepare(priv->macclk);
  out_regulator_disable:
         if (priv->regulator)
                 regulator_disable(priv->regulator);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c

index de51c2177d03b3cf9e4653226bd8901b1d29834e..d09c5a9c53b502788224102a6ae789cc42cc9b25 100644 (file)
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
         /* Reset PHY, otherwise the read DMA engine will be in a mode that
          * breaks all requests to 256 bytes.
          */
-       if (tg3_asic_rev(tp) == ASIC_REV_57766)
+       if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+           tg3_asic_rev(tp) == ASIC_REV_5717 ||
+           tg3_asic_rev(tp) == ASIC_REV_5719)
                 reset_phy = true;
  
         err = tg3_restart_hw(tp, reset_phy);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c

index bc93b69cfd1edcf62d11cd24d41a9ca74b8f0dcc..a539263cd79ce4be8fcc0cbfe6bfdd196336cd38 100644 (file)
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
         val &= ~MVNETA_GMAC0_PORT_ENABLE;
         mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
  
+       pp->link = 0;
+       pp->duplex = -1;
+       pp->speed = 0;
+
         udelay(200);
  }
  
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
  
                 if (!mvneta_rxq_desc_is_first_last(rx_status) ||
                     (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+                       mvneta_rx_error(pp, rx_desc);
  err_drop_frame:
                         dev->stats.rx_errors++;
-                       mvneta_rx_error(pp, rx_desc);
                         /* leave the descriptor untouched */
                         continue;
                 }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
  {
         int queue;
  
-       for (queue = 0; queue < txq_number; queue++)
+       for (queue = 0; queue < rxq_number; queue++)
                 mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
  }
  
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c

index 6e423f098a60d33f96678ef5c08b1b52cbe3287f..31efc47c847eaf555d3c54b7a754a83c5e4f8d9b 100644 (file)
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -4081,7 +4081,6 @@ static void skge_remove(struct pci_dev *pdev)
         if (hw->ports > 1) {
                 skge_write32(hw, B0_IMSK, 0);
                 skge_read32(hw, B0_IMSK);
-               free_irq(pdev->irq, hw);
         }
         spin_unlock_irq(&hw->hw_lock);
  
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

index 54adfd96785846f9e60a2ded11ab96bc0c196c7e..fc67e35b253e4e59c12227c3e24da9c0f5bae311 100644 (file)
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
         /* set GE2 TUNE */
         regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
  
-       /* GE1, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
-
-       /* GE2, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
+       /* Set linkdown as the default for each GMAC. Its own MCR would be set
+        * up with the more appropriate value when mtk_phy_link_adjust call is
+        * being invoked.
+        */
+       for (i = 0; i < MTK_MAC_COUNT; i++)
+               mtk_w32(eth, 0, MTK_MAC_MCR(i));
  
         /* Indicates CDM to parse the MTK special tag from CPU
          * which also is working out for untag packets.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c

index e0eb695318e64ebcaf58d6edb5f9a57be6f9ddf6..1fa4849a6f560f2c3e15dddc13c03bb59031a5b7 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -188,7 +188,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
         struct net_device *dev = mdev->pndev[port];
         struct mlx4_en_priv *priv = netdev_priv(dev);
         struct net_device_stats *stats = &dev->stats;
-       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_cmd_mailbox *mailbox, *mailbox_priority;
         u64 in_mod = reset << 8 | port;
         int err;
         int i, counter_index;
@@ -198,6 +198,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
         mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
         if (IS_ERR(mailbox))
                 return PTR_ERR(mailbox);
+
+       mailbox_priority = mlx4_alloc_cmd_mailbox(mdev->dev);
+       if (IS_ERR(mailbox_priority)) {
+               mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+               return PTR_ERR(mailbox_priority);
+       }
+
         err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
                            MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
                            MLX4_CMD_NATIVE);
@@ -206,6 +213,28 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
  
         mlx4_en_stats = mailbox->buf;
  
+       memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
+       counter_index = mlx4_get_default_counter_index(mdev->dev, port);
+       err = mlx4_get_counter_stats(mdev->dev, counter_index,
+                                    &tmp_counter_stats, reset);
+
+       /* 0xffs indicates invalid value */
+       memset(mailbox_priority->buf, 0xff,
+              sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+
+       if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
+               memset(mailbox_priority->buf, 0,
+                      sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+               err = mlx4_cmd_box(mdev->dev, 0, mailbox_priority->dma,
+                                  in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
+                                  0, MLX4_CMD_DUMP_ETH_STATS,
+                                  MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+               if (err)
+                       goto out;
+       }
+
+       flowstats = mailbox_priority->buf;
+
         spin_lock_bh(&priv->stats_lock);
  
         mlx4_en_fold_software_stats(dev);
@@ -345,31 +374,6 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
         priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan);
         priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan);
  
-       spin_unlock_bh(&priv->stats_lock);
-
-       memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
-       counter_index = mlx4_get_default_counter_index(mdev->dev, port);
-       err = mlx4_get_counter_stats(mdev->dev, counter_index,
-                                    &tmp_counter_stats, reset);
-
-       /* 0xffs indicates invalid value */
-       memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
-
-       if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
-               memset(mailbox->buf, 0,
-                      sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
-               err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma,
-                                  in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
-                                  0, MLX4_CMD_DUMP_ETH_STATS,
-                                  MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
-               if (err)
-                       goto out;
-       }
-
-       flowstats = mailbox->buf;
-
-       spin_lock_bh(&priv->stats_lock);
-
         if (tmp_counter_stats.counter_mode == 0) {
                 priv->pf_stats.rx_bytes   = be64_to_cpu(tmp_counter_stats.rx_bytes);
                 priv->pf_stats.tx_bytes   = be64_to_cpu(tmp_counter_stats.tx_bytes);
@@ -410,6 +414,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
  
  out:
         mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+       mlx4_free_cmd_mailbox(mdev->dev, mailbox_priority);
         return err;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c

index 88699b18194618b0f6fd71af120bf2848560787a..946d9db7c8c2028c61b03b7f3d2f5d7243280ab5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -185,7 +185,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
                 if (priv->mdev->dev->caps.flags &
                                         MLX4_DEV_CAP_FLAG_UC_LOOPBACK) {
                         buf[3] = mlx4_en_test_registers(priv);
-                       if (priv->port_up)
+                       if (priv->port_up && dev->mtu >= MLX4_SELFTEST_LB_MIN_MTU)
                                 buf[4] = mlx4_en_test_loopback(priv);
                 }
  
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

index 1856e279a7e0a40b9365da2016a1b52e9156479b..2b72677eccd48f5a45aa6f0e44cc45258cf24762 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -153,6 +153,9 @@
  #define SMALL_PACKET_SIZE      (256 - NET_IP_ALIGN)
  #define HEADER_COPY_SIZE       (128 - NET_IP_ALIGN)
  #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
+#define PREAMBLE_LEN           8
+#define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \
+                                 ETH_HLEN + PREAMBLE_LEN)
  
  #define MLX4_EN_MIN_MTU                46
  /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c

index 04304dd894c6c3119eb24302fe599766dadd3708..606a0e0beeae6961ae4e8c7a357d737834be614e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -611,7 +611,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
                                                 MLX4_MAX_PORTS;
                                 else
                                         res_alloc->guaranteed[t] = 0;
-                               res_alloc->res_free -= res_alloc->guaranteed[t];
                                 break;
                         default:
                                 break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

index 1fffdebbc9e8994c70a19f4982f26d1de98be5f2..e9a1fbcc4adfa6e692902b551d0c535bfe019a9a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
         case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
         case MLX5_CMD_OP_QUERY_Q_COUNTER:
-       case MLX5_CMD_OP_SET_RATE_LIMIT:
+       case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
         case MLX5_CMD_OP_QUERY_RATE_LIMIT:
         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
         MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
         MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
         MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
-       MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
         MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
         MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
         MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h

index c0872b3284cb405583642d71a0e2e540d4804b6f..543060c305a073c0457cc31ae7318f425a0e7c49 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -82,6 +82,9 @@
         max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
  #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
  #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+       (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
  
  #define MLX5_MPWRQ_LOG_WQE_SZ                  18
  #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
         struct mlx5_core_dev      *mdev;
         struct hwtstamp_config    *tstamp;
         int                        ix;
+       int                        cpu;
  };
  
  struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
                                  u8 cq_period_mode);
  void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
                                  u8 cq_period_mode);
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params,
+                              u8 rq_type);
  
  static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
  {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c

index c6d90b6dd80efa9a1ee82958adf5ef4dd3b4522d..9bcf38f4123b504637c080413078c23304d9e49e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
  static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
                                     struct ieee_ets *ets)
  {
+       bool have_ets_tc = false;
         int bw_sum = 0;
         int i;
  
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
         }
  
         /* Validate Bandwidth Sum */
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+                       have_ets_tc = true;
                         bw_sum += ets->tc_tx_bw[i];
+               }
+       }
  
-       if (bw_sum != 0 && bw_sum != 100) {
+       if (have_ets_tc && bw_sum != 100) {
                 netdev_err(netdev,
                            "Failed to validate ETS: BW sum is illegal\n");
                 return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

index 23425f02840581f6be591bc48cf8cccc8cc26443..8f05efa5c829bccb67ddd8b24dc2997adfe4a6c8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
         new_channels.params = priv->channels.params;
         MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
  
-       mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
-                                new_channels.params.rq_wq_type);
+       new_channels.params.mpwqe_log_stride_sz =
+               MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
+       new_channels.params.mpwqe_log_num_strides =
+               MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
  
         if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                 priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
                 return err;
  
         mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+       mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+                 MLX5E_GET_PFLAG(&priv->channels.params,
+                                 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
         return 0;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index d2b057a3e512c1144d741ccffd5bf47b5f138a01..d9d8227f195f0e151ba948e0622ea90a411817c4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
         struct mlx5e_cq_param      icosq_cq;
  };
  
-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
-{
-       return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
-}
-
  static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
  {
         return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
                 MLX5_CAP_ETH(mdev, reg_umr_sq);
  }
  
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type)
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params, u8 rq_type)
  {
         params->rq_wq_type = rq_type;
         params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                 params->log_rq_size = is_kdump_kernel() ?
                         MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
                         MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-               params->mpwqe_log_stride_sz =
-                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
-                       MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
-                       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+               params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
+                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
                 params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
                         params->mpwqe_log_stride_sz;
                 break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                        MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
  }
  
-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
+                               struct mlx5e_params *params)
  {
         u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
                     !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
                     MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
                     MLX5_WQ_TYPE_LINKED_LIST;
-       mlx5e_set_rq_type_params(mdev, params, rq_type);
+       mlx5e_init_rq_type_params(mdev, params, rq_type);
  }
  
  static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
         int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
         int mtt_sz = mlx5e_get_wqe_mtt_sz();
         int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
-       int node = mlx5e_get_node(c->priv, c->ix);
         int i;
  
         rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
-                                       GFP_KERNEL, node);
+                                     GFP_KERNEL, cpu_to_node(c->cpu));
         if (!rq->mpwqe.info)
                 goto err_out;
  
         /* We allocate more than mtt_sz as we will align the pointer */
-       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
-                                       GFP_KERNEL, node);
+       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+                                       cpu_to_node(c->cpu));
         if (unlikely(!rq->mpwqe.mtt_no_align))
                 goto err_free_wqe_info;
  
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
         int err;
         int i;
  
-       rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       rqp->wq.db_numa_node = cpu_to_node(c->cpu);
  
         err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
                                 &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
         default: /* MLX5_WQ_TYPE_LINKED_LIST */
                 rq->wqe.frag_info =
                         kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
-                                    GFP_KERNEL,
-                                    mlx5e_get_node(c->priv, c->ix));
+                                    GFP_KERNEL, cpu_to_node(c->cpu));
                 if (!rq->wqe.frag_info) {
                         err = -ENOMEM;
                         goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
         sq->uar_map   = mdev->mlx5e_res.bfreg.map;
         sq->min_inline_mode = params->tx_min_inline_mode;
  
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
         if (err)
                 return err;
         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
  
-       err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
         if (err)
                 goto err_sq_wq_destroy;
  
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
         sq->channel   = c;
         sq->uar_map   = mdev->mlx5e_res.bfreg.map;
  
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
         if (err)
                 return err;
         sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
  
-       err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
         if (err)
                 goto err_sq_wq_destroy;
  
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
         if (MLX5_IPSEC_DEV(c->priv->mdev))
                 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
  
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
         if (err)
                 return err;
         sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
  
-       err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
         if (err)
                 goto err_sq_wq_destroy;
  
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
         struct mlx5_core_dev *mdev = c->priv->mdev;
         int err;
  
-       param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
-       param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
+       param->wq.buf_numa_node = cpu_to_node(c->cpu);
+       param->wq.db_numa_node  = cpu_to_node(c->cpu);
         param->eq_ix   = c->ix;
  
         err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
         mlx5e_free_cq(cq);
  }
  
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+       return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
+}
+
  static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
                              struct mlx5e_params *params,
                              struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
  {
         struct mlx5e_cq_moder icocq_moder = {0, 0};
         struct net_device *netdev = priv->netdev;
+       int cpu = mlx5e_get_cpu(priv, ix);
         struct mlx5e_channel *c;
         unsigned int irq;
         int err;
         int eqn;
  
-       c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
+       c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
         if (!c)
                 return -ENOMEM;
  
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
         c->mdev     = priv->mdev;
         c->tstamp   = &priv->tstamp;
         c->ix       = ix;
+       c->cpu      = cpu;
         c->pdev     = &priv->mdev->pdev->dev;
         c->netdev   = priv->netdev;
         c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
         for (tc = 0; tc < c->num_tc; tc++)
                 mlx5e_activate_txqsq(&c->sq[tc]);
         mlx5e_activate_rq(&c->rq);
-       netif_set_xps_queue(c->netdev,
-               mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
+       netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
  }
  
  static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                                                      struct sk_buff *skb,
                                                      netdev_features_t features)
  {
+       unsigned int offset = 0;
         struct udphdr *udph;
         u8 proto;
         u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                 proto = ip_hdr(skb)->protocol;
                 break;
         case htons(ETH_P_IPV6):
-               proto = ipv6_hdr(skb)->nexthdr;
+               proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
                 break;
         default:
                 goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c

index 60771865c99c9bf4402d042a760887c4497e0036..e7e7cef2bde402be23b191873a5790ed23fd7843 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                         break;
                 case MLX5_EVENT_TYPE_CQ_ERROR:
                         cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
                                        cqn, eqe->data.cq_err.syndrome);
                         mlx5_cq_event(dev, cqn, eqe->type);
                         break;
@@ -775,7 +775,7 @@ err1:
         return err;
  }
  
-int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
  {
         struct mlx5_eq_table *table = &dev->priv.eq_table;
         int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
         if (MLX5_CAP_GEN(dev, pg)) {
                 err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
                 if (err)
-                       return err;
+                       mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+                                     err);
         }
  #endif
  
         err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
         if (err)
-               return err;
+               mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+                             err);
  
-       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       if (err)
+               mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+                             err);
         mlx5_cmd_use_polling(dev);
  
         err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
         if (err)
-               mlx5_cmd_use_events(dev);
-
-       return err;
+               mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+                             err);
  }
  
  int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c

index 3c11d6e2160abeef5a893b7b81274a7ce315368c..14962969c5ba8c4462662eeb30ef10cbe1c27fa6 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
         u8 actual_size;
         int err;
  
+       if (!size)
+               return -EINVAL;
+
         if (!fdev->mdev)
                 return -ENOTCONN;
  
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
         u8 actual_size;
         int err;
  
+       if (!size)
+               return -EINVAL;
+
         if (!fdev->mdev)
                 return -ENOTCONN;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

index c70fd663a63301e7e89ef9ee00d37c7075fe1a0b..dfaad9ecb2b8f155c5cdf30451c572b2d10f1d37 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
  static void del_sw_flow_table(struct fs_node *node);
  static void del_sw_flow_group(struct fs_node *node);
  static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
  /* Delete rule (destination) is special case that 
   * requires to lock the FTE for all the deletion process.
   */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
         return NULL;
  }
  
+static void del_sw_ns(struct fs_node *node)
+{
+       kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+       kfree(node);
+}
+
  static void del_hw_flow_table(struct fs_node *node)
  {
         struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
                 return ERR_PTR(-ENOMEM);
  
         fs_prio->node.type = FS_TYPE_PRIO;
-       tree_init_node(&fs_prio->node, NULL, NULL);
+       tree_init_node(&fs_prio->node, NULL, del_sw_prio);
         tree_add_node(&fs_prio->node, &ns->node);
         fs_prio->num_levels = num_levels;
         fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
                 return ERR_PTR(-ENOMEM);
  
         fs_init_namespace(ns);
-       tree_init_node(&ns->node, NULL, NULL);
+       tree_init_node(&ns->node, NULL, del_sw_ns);
         tree_add_node(&ns->node, &prio->node);
         list_add_tail(&ns->node.list, &prio->node.children);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c

index 1a0e797ad001ad672c954c228350ff9bcdea125b..21d29f7936f6c5d1e26c6e0d3f10644fd0f096c8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
         u32 fw;
         int i;
  
-       /* If the syndrom is 0, the device is OK and no need to print buffer */
+       /* If the syndrome is 0, the device is OK and no need to print buffer */
         if (!ioread8(&h->synd))
                 return;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c

index d2a66dc4adc6d2933cfbc60c28cd49c67716a010..8812d7208e8f3522500b3f3e971b4a7341b22c8f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
                                    struct mlx5e_params *params)
  {
         /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-       mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
+       mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
  
         /* RQ size in ipoib by default is 512 */
         params->log_rq_size = is_kdump_kernel() ?
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c

index 5f323442cc5ac009d5006438d93183e96b85d0d9..8a89c7e8cd631f2e14cb7cbac99a8983964b7eda 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
  {
         struct mlx5_priv *priv = &dev->priv;
         struct mlx5_eq_table *table = &priv->eq_table;
-       struct irq_affinity irqdesc = {
-               .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
-       };
         int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
         int nvec;
  
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
         if (!priv->irq_info)
                 goto err_free_msix;
  
-       nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
+       nvec = pci_alloc_irq_vectors(dev->pdev,
                         MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-                       PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
-                       &irqdesc);
+                       PCI_IRQ_MSIX);
         if (nvec < 0)
                 return nvec;
  
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
         return (u64)timer_l | (u64)timer_h1 << 32;
  }
  
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       priv->irq_info[i].mask);
+
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+       return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+               err = mlx5_irq_set_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
  int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                     unsigned int *irqn)
  {
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                 goto err_stop_eqs;
         }
  
+       err = mlx5_irq_set_affinity_hints(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+               goto err_affinity_hints;
+       }
+
         err = mlx5_init_fs(dev);
         if (err) {
                 dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
         mlx5_cleanup_fs(dev);
  
  err_fs:
+       mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
         free_comp_eqs(dev);
  
  err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
  
         mlx5_sriov_detach(dev);
         mlx5_cleanup_fs(dev);
+       mlx5_irq_clear_affinity_hints(dev);
         free_comp_eqs(dev);
         mlx5_stop_eqs(dev);
         mlx5_put_uars_page(dev, priv->uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c

index db9e665ab10474f934131b5c2a8fa2173fa5feb6..889130edb71525ecd1f46e88a11b2d3fa0ef843f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
  err_cmd:
         memset(din, 0, sizeof(din));
         memset(dout, 0, sizeof(dout));
-       MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-       MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+       MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+       MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
         mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
         return err;
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c

index e651e4c02867740d35c07bfcf485860f26ad6409..d3c33e9eea7292412974802c4c38ded8898ed55c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
         return ret_entry;
  }
  
-static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
                                    u32 rate, u16 index)
  {
-       u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
  
-       MLX5_SET(set_rate_limit_in, in, opcode,
-                MLX5_CMD_OP_SET_RATE_LIMIT);
-       MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
-       MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+       MLX5_SET(set_pp_rate_limit_in, in, opcode,
+                MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
         return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
                 entry->refcount++;
         } else {
                 /* new rate limit */
-               err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+               err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
                 if (err) {
                         mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
                                       rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
         entry->refcount--;
         if (!entry->refcount) {
                 /* need to remove rate */
-               mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+               mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
                 entry->rate = 0;
         }
  
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
         /* Clear all configured rates */
         for (i = 0; i < table->max_size; i++)
                 if (table->rl_entry[i].rate)
-                       mlx5_set_rate_limit_cmd(dev, 0,
-                                               table->rl_entry[i].index);
+                       mlx5_set_pp_rate_limit_cmd(dev, 0,
+                                                  table->rl_entry[i].index);
  
         kfree(dev->priv.rl_table.rl_entry);
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c

index 07a9ba6cfc70a11f7b4c05c73c1b32a704b7e6ba..2f74953e4561511e23d8fe3219db89104e3dd9e3 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
         struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
         struct mlx5e_vxlan *vxlan;
  
-       spin_lock(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
         vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-       spin_unlock(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
  
         return vxlan;
  }
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
         struct mlx5e_vxlan *vxlan;
         int err;
  
-       if (mlx5e_vxlan_lookup_port(priv, port))
+       mutex_lock(&priv->state_lock);
+       vxlan = mlx5e_vxlan_lookup_port(priv, port);
+       if (vxlan) {
+               atomic_inc(&vxlan->refcount);
                 goto free_work;
+       }
  
         if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
                 goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
                 goto err_delete_port;
  
         vxlan->udp_port = port;
+       atomic_set(&vxlan->refcount, 1);
  
-       spin_lock_irq(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
         err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
-       spin_unlock_irq(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
         if (err)
                 goto err_free;
  
@@ -113,35 +118,39 @@ err_free:
  err_delete_port:
         mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
  free_work:
+       mutex_unlock(&priv->state_lock);
         kfree(vxlan_work);
  }
  
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
+static void mlx5e_vxlan_del_port(struct work_struct *work)
  {
+       struct mlx5e_vxlan_work *vxlan_work =
+               container_of(work, struct mlx5e_vxlan_work, work);
+       struct mlx5e_priv *priv         = vxlan_work->priv;
         struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
+       u16 port = vxlan_work->port;
         struct mlx5e_vxlan *vxlan;
+       bool remove = false;
  
-       spin_lock_irq(&vxlan_db->lock);
-       vxlan = radix_tree_delete(&vxlan_db->tree, port);
-       spin_unlock_irq(&vxlan_db->lock);
-
+       mutex_lock(&priv->state_lock);
+       spin_lock_bh(&vxlan_db->lock);
+       vxlan = radix_tree_lookup(&vxlan_db->tree, port);
         if (!vxlan)
-               return;
-
-       mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
-       kfree(vxlan);
-}
+               goto out_unlock;
  
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
-       struct mlx5e_vxlan_work *vxlan_work =
-               container_of(work, struct mlx5e_vxlan_work, work);
-       struct mlx5e_priv *priv = vxlan_work->priv;
-       u16 port = vxlan_work->port;
+       if (atomic_dec_and_test(&vxlan->refcount)) {
+               radix_tree_delete(&vxlan_db->tree, port);
+               remove = true;
+       }
  
-       __mlx5e_vxlan_core_del_port(priv, port);
+out_unlock:
+       spin_unlock_bh(&vxlan_db->lock);
  
+       if (remove) {
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
+       }
+       mutex_unlock(&priv->state_lock);
         kfree(vxlan_work);
  }
  
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
         struct mlx5e_vxlan *vxlan;
         unsigned int port = 0;
  
-       spin_lock_irq(&vxlan_db->lock);
+       /* Lockless since we are the only radix-tree consumers, wq is disabled */
         while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
                 port = vxlan->udp_port;
-               spin_unlock_irq(&vxlan_db->lock);
-               __mlx5e_vxlan_core_del_port(priv, (u16)port);
-               spin_lock_irq(&vxlan_db->lock);
+               radix_tree_delete(&vxlan_db->tree, port);
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
         }
-       spin_unlock_irq(&vxlan_db->lock);
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h

index 5def12c048e38992e7edd9f870233e369ef4580e..5ef6ae7d568abcd1410bc403b526628a634799cb 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
@@ -36,6 +36,7 @@
  #include "en.h"
  
  struct mlx5e_vxlan {
+       atomic_t refcount;
         u16 udp_port;
  };
  
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

index 2d0897b7d86035286666e38ad4e41ab63fb4746b..9bd8d28de1522906b92021a8cf2c476b79c35a56 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4300,6 +4300,7 @@ static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
  
  static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
  {
+       u16 vid = 1;
         int err;
  
         err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true);
@@ -4312,8 +4313,19 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
                                      true, false);
         if (err)
                 goto err_port_vlan_set;
+
+       for (; vid <= VLAN_N_VID - 1; vid++) {
+               err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
+                                                    vid, false);
+               if (err)
+                       goto err_vid_learning_set;
+       }
+
         return 0;
  
+err_vid_learning_set:
+       for (vid--; vid >= 1; vid--)
+               mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
  err_port_vlan_set:
         mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
  err_port_stp_set:
@@ -4323,6 +4335,12 @@ err_port_stp_set:
  
  static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
  {
+       u16 vid;
+
+       for (vid = VLAN_N_VID - 1; vid >= 1; vid--)
+               mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
+                                              vid, true);
+
         mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
                                false, false);
         mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

index 72ef4f8025f00ff8810c2955b25b7f3baec49be1..be657b8533f04922a61a2f3a4b1aeddf3137cdf5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
  }
  
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-                                   const struct mlxsw_sp_rif *rif)
-{
-       char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-       mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-                            rif->rif_index, rif->addr);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
  static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
                                          struct mlxsw_sp_rif *rif)
  {
         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
  
-       mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-                                rif_list_node)
+                                rif_list_node) {
+               mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+       }
  }
  
  enum mlxsw_sp_nexthop_type {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c

index e379b78e86efa7c02dca2bc95afc1f79afc7800a..13190aa09faf748c16e1f00f7aee9097442aef85 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
         return nfp_net_ebpf_capable(nn) ? "BPF" : "";
  }
  
+static int
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+       int err;
+
+       nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
+       if (!nn->app_priv)
+               return -ENOMEM;
+
+       err = nfp_app_nic_vnic_alloc(app, nn, id);
+       if (err)
+               goto err_free_priv;
+
+       return 0;
+err_free_priv:
+       kfree(nn->app_priv);
+       return err;
+}
+
  static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
  {
+       struct nfp_bpf_vnic *bv = nn->app_priv;
+
         if (nn->dp.bpf_offload_xdp)
                 nfp_bpf_xdp_offload(app, nn, NULL);
+       WARN_ON(bv->tc_prog);
+       kfree(bv);
  }
  
  static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
  {
         struct tc_cls_bpf_offload *cls_bpf = type_data;
         struct nfp_net *nn = cb_priv;
+       struct bpf_prog *oldprog;
+       struct nfp_bpf_vnic *bv;
+       int err;
  
         if (type != TC_SETUP_CLSBPF ||
             !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
             cls_bpf->common.protocol != htons(ETH_P_ALL) ||
             cls_bpf->common.chain_index)
                 return -EOPNOTSUPP;
-       if (nn->dp.bpf_offload_xdp)
-               return -EBUSY;
  
         /* Only support TC direct action */
         if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
                 return -EOPNOTSUPP;
         }
  
-       switch (cls_bpf->command) {
-       case TC_CLSBPF_REPLACE:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
-       case TC_CLSBPF_ADD:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
-       case TC_CLSBPF_DESTROY:
-               return nfp_net_bpf_offload(nn, NULL, true);
-       default:
+       if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
                 return -EOPNOTSUPP;
+
+       bv = nn->app_priv;
+       oldprog = cls_bpf->oldprog;
+
+       /* Don't remove if oldprog doesn't match driver's state */
+       if (bv->tc_prog != oldprog) {
+               oldprog = NULL;
+               if (!cls_bpf->prog)
+                       return 0;
         }
+
+       err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
+       if (err)
+               return err;
+
+       bv->tc_prog = cls_bpf->prog;
+       return 0;
  }
  
  static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
  
         .extra_cap      = nfp_bpf_extra_cap,
  
-       .vnic_alloc     = nfp_app_nic_vnic_alloc,
+       .vnic_alloc     = nfp_bpf_vnic_alloc,
         .vnic_free      = nfp_bpf_vnic_free,
  
         .setup_tc       = nfp_bpf_setup_tc,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h

index 082a15f6dfb5b9ba806316bd4f272006c93749fb..57b6043177a3891c49096ab85906ee539b3d5ecb 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -172,6 +172,14 @@ struct nfp_prog {
         struct list_head insns;
  };
  
+/**
+ * struct nfp_bpf_vnic - per-vNIC BPF priv structure
+ * @tc_prog:   currently loaded cls_bpf program
+ */
+struct nfp_bpf_vnic {
+       struct bpf_prog *tc_prog;
+};
+
  int nfp_bpf_jit(struct nfp_prog *prog);
  
  extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c

index 18461fcb981501efd7015634999cb787041c01a7..53dbf1e163a85ea5bdfc571788c18ce0d6d0f7b3 100644 (file)
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -47,6 +47,7 @@
  #define MDIO_CLK_25_28                                               7
  
  #define MDIO_WAIT_TIMES                                           1000
+#define MDIO_STATUS_DELAY_TIME                                       1
  
  static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
  {
@@ -65,7 +66,7 @@ static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
  
         if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
                                !(reg & (MDIO_START | MDIO_BUSY)),
-                              100, MDIO_WAIT_TIMES * 100))
+                              MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100))
                 return -EIO;
  
         return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
@@ -88,8 +89,8 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
         writel(reg, adpt->base + EMAC_MDIO_CTRL);
  
         if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
-                              !(reg & (MDIO_START | MDIO_BUSY)), 100,
-                              MDIO_WAIT_TIMES * 100))
+                              !(reg & (MDIO_START | MDIO_BUSY)),
+                              MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100))
                 return -EIO;
  
         return 0;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c

index 70c92b649b299a1a16195e144e3da77cff25855c..38c924bdd32e46f3586eac77d5a63c361993fd09 100644 (file)
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
                 return ret;
         }
  
-       ret = emac_mac_up(adpt);
+       ret = adpt->phy.open(adpt);
         if (ret) {
                 emac_mac_rx_tx_rings_free_all(adpt);
                 free_irq(irq->irq, irq);
                 return ret;
         }
  
-       ret = adpt->phy.open(adpt);
+       ret = emac_mac_up(adpt);
         if (ret) {
-               emac_mac_down(adpt);
                 emac_mac_rx_tx_rings_free_all(adpt);
                 free_irq(irq->irq, irq);
+               adpt->phy.close(adpt);
                 return ret;
         }
  
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c

index 2b962d349f5f415b0b38148a5d575a5720850fbe..009780df664b4a09ea3e88855b0e707ff0b24b10 100644 (file)
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2308,32 +2308,9 @@ static int __maybe_unused ravb_resume(struct device *dev)
         struct ravb_private *priv = netdev_priv(ndev);
         int ret = 0;
  
-       if (priv->wol_enabled) {
-               /* Reduce the usecount of the clock to zero and then
-                * restore it to its original value. This is done to force
-                * the clock to be re-enabled which is a workaround
-                * for renesas-cpg-mssr driver which do not enable clocks
-                * when resuming from PSCI suspend/resume.
-                *
-                * Without this workaround the driver fails to communicate
-                * with the hardware if WoL was enabled when the system
-                * entered PSCI suspend. This is due to that if WoL is enabled
-                * we explicitly keep the clock from being turned off when
-                * suspending, but in PSCI sleep power is cut so the clock
-                * is disabled anyhow, the clock driver is not aware of this
-                * so the clock is not turned back on when resuming.
-                *
-                * TODO: once the renesas-cpg-mssr suspend/resume is working
-                *       this clock dance should be removed.
-                */
-               clk_disable(priv->clk);
-               clk_disable(priv->clk);
-               clk_enable(priv->clk);
-               clk_enable(priv->clk);
-
-               /* Set reset mode to rearm the WoL logic */
+       /* If WoL is enabled set reset mode to rearm the WoL logic */
+       if (priv->wol_enabled)
                 ravb_write(ndev, CCC_OPC_RESET, CCC);
-       }
  
         /* All register have been reset to default values.
          * Restore all registers which where setup at probe time and
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c

index db72d13cebb9e52c3e07ac9de09670b3f547af4b..75323000c3646bc781c12287367f8b455ada5a6a 100644 (file)
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1892,6 +1892,16 @@ static int sh_eth_phy_init(struct net_device *ndev)
                 return PTR_ERR(phydev);
         }
  
+       /* mask with MAC supported features */
+       if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) {
+               int err = phy_set_max_speed(phydev, SPEED_100);
+               if (err) {
+                       netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n");
+                       phy_disconnect(phydev);
+                       return err;
+               }
+       }
+
         phy_attached_info(phydev);
  
         return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h

index e1e5ac0537606f2192d553c85795428b18fd615d..ce2ea2d491acac195eefe3f01f8ec9df08d3e77c 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
         /* get timestamp value */
          u64(*get_timestamp) (void *desc, u32 ats);
         /* get rx timestamp status */
-       int (*get_rx_timestamp_status) (void *desc, u32 ats);
+       int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
         /* Display ring */
         void (*display_ring)(void *head, unsigned int size, bool rx);
         /* set MSS via context descriptor */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c

index 4b286e27c4ca5cdbbb7c457e31bef1b2e9e7bd94..7e089bf906b4f316034403f9a44fbfd191ee09eb 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
         return ret;
  }
  
-static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
+                                                u32 ats)
  {
         struct dma_desc *p = (struct dma_desc *)desc;
         int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
  
                         /* Check if timestamp is OK from context descriptor */
                         do {
-                               ret = dwmac4_rx_check_timestamp(desc);
+                               ret = dwmac4_rx_check_timestamp(next_desc);
                                 if (ret < 0)
                                         goto exit;
                                 i++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c

index 7546b3664113a3d776fe19094df71b2adfb99e98..2a828a31281423082995bc332ec51a3f20989804 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
         return ns;
  }
  
-static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
+static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
+                                           u32 ats)
  {
         if (ats) {
                 struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c

index f817f8f365696d3388e73f85710d30dde43a7d41..db4cee57bb2465eb98fe38cb947624e779da4673 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
         return ns;
  }
  
-static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
+static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
  {
         struct dma_desc *p = (struct dma_desc *)desc;
  
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c

index 721b616552611aa74ea077e744ec9a0c4836a48f..08c19ebd530674972ceb9ebcb41cd7af4b3fb58d 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
  {
         u32 value = readl(ioaddr + PTP_TCR);
         unsigned long data;
+       u32 reg_value;
  
         /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
          *      formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
  
         data &= PTP_SSIR_SSINC_MASK;
  
+       reg_value = data;
         if (gmac4)
-               data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+               reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
  
-       writel(data, ioaddr + PTP_SSIR);
+       writel(reg_value, ioaddr + PTP_SSIR);
  
         return data;
  }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

index d7250539d0bd0c61c92fc9460c9e1197bb57ac8f..337d53d12e94b3acfe745e48422b44d1939ad2c0 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
                 desc = np;
  
         /* Check if timestamp is available */
-       if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
+       if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
                 ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
                 netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                 shhwtstamp = skb_hwtstamps(skb);
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c

index 8483f03d5a4103d6c4da7d5d586a99e013f06107..1ab97d99b9bae9f9dde6227dacff606256e66c72 100644 (file)
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1379,8 +1379,8 @@ static int rr_close(struct net_device *dev)
                             rrpriv->info_dma);
         rrpriv->info = NULL;
  
-       free_irq(pdev->irq, dev);
         spin_unlock_irqrestore(&rrpriv->lock, flags);
+       free_irq(pdev->irq, dev);
  
         return 0;
  }
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c

index 5f93e6add56394f28f68da72ddb8fc724db14200..e911e4990b20e181aaf3711d1069f086c2a7a4d3 100644 (file)
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -239,14 +239,10 @@ static int at803x_resume(struct phy_device *phydev)
  {
         int value;
  
-       mutex_lock(&phydev->lock);
-
         value = phy_read(phydev, MII_BMCR);
         value &= ~(BMCR_PDOWN | BMCR_ISOLATE);
         phy_write(phydev, MII_BMCR, value);
  
-       mutex_unlock(&phydev->lock);
-
         return 0;
  }
  
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c

index 4d02b27df0445e7c1ad6f0774e44a5b7013bb05e..82104edca393b9b6662a18ef8ea0bdd8d3bb057d 100644 (file)
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -637,6 +637,10 @@ static int m88e1510_config_aneg(struct phy_device *phydev)
         if (err < 0)
                 goto error;
  
+       /* Do not touch the fiber page if we're in copper->sgmii mode */
+       if (phydev->interface == PHY_INTERFACE_MODE_SGMII)
+               return 0;
+
         /* Then the fiber link */
         err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE);
         if (err < 0)
@@ -875,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
  
         /* SGMII-to-Copper mode initialization */
         if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+               u32 pause;
+
                 /* Select page 18 */
                 err = marvell_set_page(phydev, 18);
                 if (err < 0)
@@ -898,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
                 err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
                 if (err < 0)
                         return err;
+
+               /* There appears to be a bug in the 88e1512 when used in
+                * SGMII to copper mode, where the AN advertisment register
+                * clears the pause bits each time a negotiation occurs.
+                * This means we can never be truely sure what was advertised,
+                * so disable Pause support.
+                */
+               pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               phydev->supported &= ~pause;
+               phydev->advertising &= ~pause;
         }
  
         return m88e1121_config_init(phydev);
@@ -2069,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
                 .flags = PHY_HAS_INTERRUPT,
                 .probe = marvell_probe,
                 .config_init = &m88e1145_config_init,
-               .config_aneg = &marvell_config_aneg,
+               .config_aneg = &m88e1101_config_aneg,
                 .read_status = &genphy_read_status,
                 .ack_interrupt = &marvell_ack_interrupt,
                 .config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c

index bfd3090fb055bac4c40924205036119da5b6ce61..07c6048200c6164ac77a649468063e2fedd404c6 100644 (file)
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
         }
  
         ret = xgene_enet_ecc_init(pdata);
-       if (ret)
+       if (ret) {
+               if (pdata->dev->of_node)
+                       clk_disable_unprepare(pdata->clk);
                 return ret;
+       }
         xgene_gmac_reset(pdata);
  
         return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                 return ret;
  
         mdio_bus = mdiobus_alloc();
-       if (!mdio_bus)
-               return -ENOMEM;
+       if (!mdio_bus) {
+               ret = -ENOMEM;
+               goto out_clk;
+       }
  
         mdio_bus->name = "APM X-Gene MDIO bus";
  
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                 mdio_bus->phy_mask = ~0;
                 ret = mdiobus_register(mdio_bus);
                 if (ret)
-                       goto out;
+                       goto out_mdiobus;
  
                 acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
                                     acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
         }
  
         if (ret)
-               goto out;
+               goto out_mdiobus;
  
         pdata->mdio_bus = mdio_bus;
         xgene_mdio_status = true;
  
         return 0;
  
-out:
+out_mdiobus:
         mdiobus_free(mdio_bus);
  
+out_clk:
+       if (dev->of_node)
+               clk_disable_unprepare(pdata->clk);
+
         return ret;
  }
  
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c

index 2df7b62c1a36811e97087ae641a89d06641cef4e..54d00a1d2bef094877c80bccad220de7f4d97eba 100644 (file)
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -270,6 +270,7 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus,
  
                 if (addr == mdiodev->addr) {
                         dev->of_node = child;
+                       dev->fwnode = of_fwnode_handle(child);
                         return;
                 }
         }
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c

index 1ea69b7585d9bcb8098ecddfc33d0a3a46704843..842eb871a6e38df0c22cc43b7ee02f0f137f3a05 100644 (file)
--- a/drivers/net/phy/meson-gxl.c
+++ b/drivers/net/phy/meson-gxl.c
@@ -22,6 +22,7 @@
  #include <linux/ethtool.h>
  #include <linux/phy.h>
  #include <linux/netdevice.h>
+#include <linux/bitfield.h>
  
  static int meson_gxl_config_init(struct phy_device *phydev)
  {
@@ -50,6 +51,77 @@ static int meson_gxl_config_init(struct phy_device *phydev)
         return 0;
  }
  
+/* This function is provided to cope with the possible failures of this phy
+ * during aneg process. When aneg fails, the PHY reports that aneg is done
+ * but the value found in MII_LPA is wrong:
+ *  - Early failures: MII_LPA is just 0x0001. if MII_EXPANSION reports that
+ *    the link partner (LP) supports aneg but the LP never acked our base
+ *    code word, it is likely that we never sent it to begin with.
+ *  - Late failures: MII_LPA is filled with a value which seems to make sense
+ *    but it actually is not what the LP is advertising. It seems that we
+ *    can detect this using a magic bit in the WOL bank (reg 12 - bit 12).
+ *    If this particular bit is not set when aneg is reported being done,
+ *    it means MII_LPA is likely to be wrong.
+ *
+ * In both case, forcing a restart of the aneg process solve the problem.
+ * When this failure happens, the first retry is usually successful but,
+ * in some cases, it may take up to 6 retries to get a decent result
+ */
+static int meson_gxl_read_status(struct phy_device *phydev)
+{
+       int ret, wol, lpa, exp;
+
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               ret = genphy_aneg_done(phydev);
+               if (ret < 0)
+                       return ret;
+               else if (!ret)
+                       goto read_status_continue;
+
+               /* Need to access WOL bank, make sure the access is open */
+               ret = phy_write(phydev, 0x14, 0x0000);
+               if (ret)
+                       return ret;
+               ret = phy_write(phydev, 0x14, 0x0400);
+               if (ret)
+                       return ret;
+               ret = phy_write(phydev, 0x14, 0x0000);
+               if (ret)
+                       return ret;
+               ret = phy_write(phydev, 0x14, 0x0400);
+               if (ret)
+                       return ret;
+
+               /* Request LPI_STATUS WOL register */
+               ret = phy_write(phydev, 0x14, 0x8D80);
+               if (ret)
+                       return ret;
+
+               /* Read LPI_STATUS value */
+               wol = phy_read(phydev, 0x15);
+               if (wol < 0)
+                       return wol;
+
+               lpa = phy_read(phydev, MII_LPA);
+               if (lpa < 0)
+                       return lpa;
+
+               exp = phy_read(phydev, MII_EXPANSION);
+               if (exp < 0)
+                       return exp;
+
+               if (!(wol & BIT(12)) ||
+                   ((exp & EXPANSION_NWAY) && !(lpa & LPA_LPACK))) {
+                       /* Looks like aneg failed after all */
+                       phydev_dbg(phydev, "LPA corruption - aneg restart\n");
+                       return genphy_restart_aneg(phydev);
+               }
+       }
+
+read_status_continue:
+       return genphy_read_status(phydev);
+}
+
  static struct phy_driver meson_gxl_phy[] = {
         {
                 .phy_id         = 0x01814400,
@@ -60,7 +132,7 @@ static struct phy_driver meson_gxl_phy[] = {
                 .config_init    = meson_gxl_config_init,
                 .config_aneg    = genphy_config_aneg,
                 .aneg_done      = genphy_aneg_done,
-               .read_status    = genphy_read_status,
+               .read_status    = meson_gxl_read_status,
                 .suspend        = genphy_suspend,
                 .resume         = genphy_resume,
         },
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c

index 2b1e67bc1e736ceb33f7afa8462f5a4858b522df..ed10d1fc8f59188b95e090f085ced18421ad23c2 100644 (file)
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -828,7 +828,6 @@ EXPORT_SYMBOL(phy_stop);
   */
  void phy_start(struct phy_device *phydev)
  {
-       bool do_resume = false;
         int err = 0;
  
         mutex_lock(&phydev->lock);
@@ -841,6 +840,9 @@ void phy_start(struct phy_device *phydev)
                 phydev->state = PHY_UP;
                 break;
         case PHY_HALTED:
+               /* if phy was suspended, bring the physical link up again */
+               phy_resume(phydev);
+
                 /* make sure interrupts are re-enabled for the PHY */
                 if (phydev->irq != PHY_POLL) {
                         err = phy_enable_interrupts(phydev);
@@ -849,17 +851,12 @@ void phy_start(struct phy_device *phydev)
                 }
  
                 phydev->state = PHY_RESUMING;
-               do_resume = true;
                 break;
         default:
                 break;
         }
         mutex_unlock(&phydev->lock);
  
-       /* if phy was suspended, bring the physical link up again */
-       if (do_resume)
-               phy_resume(phydev);
-
         phy_trigger_machine(phydev, true);
  }
  EXPORT_SYMBOL(phy_start);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c

index 67f25ac29025c53903cc724fac62efdd94828510..b15b31ca26182719cca6f764e19335483e11930e 100644 (file)
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -135,7 +135,9 @@ static int mdio_bus_phy_resume(struct device *dev)
         if (!mdio_bus_phy_may_suspend(phydev))
                 goto no_resume;
  
+       mutex_lock(&phydev->lock);
         ret = phy_resume(phydev);
+       mutex_unlock(&phydev->lock);
         if (ret < 0)
                 return ret;
  
@@ -1026,7 +1028,9 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
         if (err)
                 goto error;
  
+       mutex_lock(&phydev->lock);
         phy_resume(phydev);
+       mutex_unlock(&phydev->lock);
         phy_led_triggers_register(phydev);
  
         return err;
@@ -1157,6 +1161,8 @@ int phy_resume(struct phy_device *phydev)
         struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
         int ret = 0;
  
+       WARN_ON(!mutex_is_locked(&phydev->lock));
+
         if (phydev->drv && phydrv->resume)
                 ret = phydrv->resume(phydev);
  
@@ -1639,13 +1645,9 @@ int genphy_resume(struct phy_device *phydev)
  {
         int value;
  
-       mutex_lock(&phydev->lock);
-
         value = phy_read(phydev, MII_BMCR);
         phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
  
-       mutex_unlock(&phydev->lock);
-
         return 0;
  }
  EXPORT_SYMBOL(genphy_resume);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c

index 304ec6555cd88b3b058b944f6120df3294adb1ca..3000ddd1c7e2e481bb961deb86099b5c2ea11371 100644 (file)
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1204,12 +1204,14 @@ static const struct usb_device_id products[] = {
         {QMI_FIXED_INTF(0x1199, 0x9079, 10)},   /* Sierra Wireless EM74xx */
         {QMI_FIXED_INTF(0x1199, 0x907b, 8)},    /* Sierra Wireless EM74xx */
         {QMI_FIXED_INTF(0x1199, 0x907b, 10)},   /* Sierra Wireless EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x9091, 8)},    /* Sierra Wireless EM7565 */
         {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},    /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
         {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},    /* Alcatel L800MA */
         {QMI_FIXED_INTF(0x2357, 0x0201, 4)},    /* TP-LINK HSUPA Modem MA180 */
         {QMI_FIXED_INTF(0x2357, 0x9000, 4)},    /* TP-LINK MA260 */
         {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
         {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},    /* Telit ME910 */
+       {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)},    /* Telit ME910 dual modem */
         {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},    /* Telit LE920 */
         {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */
         {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)},    /* Telewell TW-3G HSPA+ */
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c

index 19b9cc51079e75346af766c91786d66eaa92c3f2..31f4b7911ef84c85789011332e37c5314099d82c 100644 (file)
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                 }
  
                 ndst = &rt->dst;
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
                 err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                 goto out_unlock;
                 }
  
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                 ttl = ttl ? : ip6_dst_hoplimit(ndst);
                 skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
  
                 max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
                                            VXLAN_HEADROOM);
+               if (max_mtu < ETH_MIN_MTU)
+                       max_mtu = ETH_MIN_MTU;
+
+               if (!changelink && !conf->mtu)
+                       dev->mtu = max_mtu;
         }
  
         if (dev->mtu > max_mtu)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c

index 10b075a46b266218c53d1e5674c1789e1e0f3d80..e8189c07b41f6b450f135ef703ec4e01568e311d 100644 (file)
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
         hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
         hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                          IEEE80211_STYPE_NULLFUNC |
+                                        IEEE80211_FCTL_TODS |
                                          (ps ? IEEE80211_FCTL_PM : 0));
         hdr->duration_id = cpu_to_le16(0);
         memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
                 if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
                         continue;
  
-               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
                 if (!skb) {
                         res = -ENOMEM;
                         goto out_err;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c

index e949e3302af4743472ac26c68e9f4d54a27bb11a..c586bcdb5190b1c9f6447cb9a380568a03840988 100644 (file)
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
         return ret;
  }
  
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
-                       struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+                       struct log_group *log)
  {
         return arena_read_bytes(arena,
-                       arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-                       2 * LOG_ENT_SIZE, 0);
+                       arena->logoff + (lane * LOG_GRP_SIZE), log,
+                       LOG_GRP_SIZE, 0);
  }
  
  static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
         debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
         debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
         debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+       debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+       debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
  }
  
  static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
         }
  }
  
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+       return le32_to_cpu(log->ent[log_idx].seq);
+}
+
  /*
   * This function accepts two log entries, and uses the
   * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
   *
   * TODO The logic feels a bit kludge-y. make it better..
   */
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
  {
+       int idx0 = a->log_index[0];
+       int idx1 = a->log_index[1];
         int old;
  
         /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
          * the next time, the following logic works out to put this
          * (next) entry into [1]
          */
-       if (ent[0].seq == 0) {
-               ent[0].seq = cpu_to_le32(1);
+       if (log_seq(log, idx0) == 0) {
+               log->ent[idx0].seq = cpu_to_le32(1);
                 return 0;
         }
  
-       if (ent[0].seq == ent[1].seq)
+       if (log_seq(log, idx0) == log_seq(log, idx1))
                 return -EINVAL;
-       if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+       if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
                 return -EINVAL;
  
-       if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
-               if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+       if (log_seq(log, idx0) < log_seq(log, idx1)) {
+               if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
                         old = 0;
                 else
                         old = 1;
         } else {
-               if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+               if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
                         old = 1;
                 else
                         old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
  {
         int ret;
         int old_ent, ret_ent;
-       struct log_entry log[2];
+       struct log_group log;
  
-       ret = btt_log_read_pair(arena, lane, log);
+       ret = btt_log_group_read(arena, lane, &log);
         if (ret)
                 return -EIO;
  
-       old_ent = btt_log_get_old(log);
+       old_ent = btt_log_get_old(arena, &log);
         if (old_ent < 0 || old_ent > 1) {
                 dev_err(to_dev(arena),
                                 "log corruption (%d): lane %d seq [%d, %d]\n",
-                       old_ent, lane, log[0].seq, log[1].seq);
+                               old_ent, lane, log.ent[arena->log_index[0]].seq,
+                               log.ent[arena->log_index[1]].seq);
                 /* TODO set error state? */
                 return -EIO;
         }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
         ret_ent = (old_flag ? old_ent : (1 - old_ent));
  
         if (ent != NULL)
-               memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+               memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
  
         return ret_ent;
  }
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
                         u32 sub, struct log_entry *ent, unsigned long flags)
  {
         int ret;
-       /*
-        * Ignore the padding in log_entry for calculating log_half.
-        * The entry is 'committed' when we write the sequence number,
-        * and we want to ensure that that is the last thing written.
-        * We don't bother writing the padding as that would be extra
-        * media wear and write amplification
-        */
-       unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
-       u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+       u32 group_slot = arena->log_index[sub];
+       unsigned int log_half = LOG_ENT_SIZE / 2;
         void *src = ent;
+       u64 ns_off;
  
+       ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+               (group_slot * LOG_ENT_SIZE);
         /* split the 16B write into atomic, durable halves */
         ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
         if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
  {
         size_t logsize = arena->info2off - arena->logoff;
         size_t chunk_size = SZ_4K, offset = 0;
-       struct log_entry log;
+       struct log_entry ent;
         void *zerobuf;
         int ret;
         u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
         }
  
         for (i = 0; i < arena->nfree; i++) {
-               log.lba = cpu_to_le32(i);
-               log.old_map = cpu_to_le32(arena->external_nlba + i);
-               log.new_map = cpu_to_le32(arena->external_nlba + i);
-               log.seq = cpu_to_le32(LOG_SEQ_INIT);
-               ret = __btt_log_write(arena, i, 0, &log, 0);
+               ent.lba = cpu_to_le32(i);
+               ent.old_map = cpu_to_le32(arena->external_nlba + i);
+               ent.new_map = cpu_to_le32(arena->external_nlba + i);
+               ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+               ret = __btt_log_write(arena, i, 0, &ent, 0);
                 if (ret)
                         goto free;
         }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
         return 0;
  }
  
+static bool ent_is_padding(struct log_entry *ent)
+{
+       return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+               && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+       bool idx_set = false, initial_state = true;
+       int ret, log_index[2] = {-1, -1};
+       u32 i, j, next_idx = 0;
+       struct log_group log;
+       u32 pad_count = 0;
+
+       for (i = 0; i < arena->nfree; i++) {
+               ret = btt_log_group_read(arena, i, &log);
+               if (ret < 0)
+                       return ret;
+
+               for (j = 0; j < 4; j++) {
+                       if (!idx_set) {
+                               if (ent_is_padding(&log.ent[j])) {
+                                       pad_count++;
+                                       continue;
+                               } else {
+                                       /* Skip if index has been recorded */
+                                       if ((next_idx == 1) &&
+                                               (j == log_index[0]))
+                                               continue;
+                                       /* valid entry, record index */
+                                       log_index[next_idx] = j;
+                                       next_idx++;
+                               }
+                               if (next_idx == 2) {
+                                       /* two valid entries found */
+                                       idx_set = true;
+                               } else if (next_idx > 2) {
+                                       /* too many valid indices */
+                                       return -ENXIO;
+                               }
+                       } else {
+                               /*
+                                * once the indices have been set, just verify
+                                * that all subsequent log groups are either in
+                                * their initial state or follow the same
+                                * indices.
+                                */
+                               if (j == log_index[0]) {
+                                       /* entry must be 'valid' */
+                                       if (ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               } else if (j == log_index[1]) {
+                                       ;
+                                       /*
+                                        * log_index[1] can be padding if the
+                                        * lane never got used and it is still
+                                        * in the initial state (three 'padding'
+                                        * entries)
+                                        */
+                               } else {
+                                       /* entry must be invalid (padding) */
+                                       if (!ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               }
+                       }
+               }
+               /*
+                * If any of the log_groups have more than one valid,
+                * non-padding entry, then the we are no longer in the
+                * initial_state
+                */
+               if (pad_count < 3)
+                       initial_state = false;
+               pad_count = 0;
+       }
+
+       if (!initial_state && !idx_set)
+               return -ENXIO;
+
+       /*
+        * If all the entries in the log were in the initial state,
+        * assume new padding scheme
+        */
+       if (initial_state)
+               log_index[1] = 1;
+
+       /*
+        * Only allow the known permutations of log/padding indices,
+        * i.e. (0, 1), and (0, 2)
+        */
+       if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+               ; /* known index possibilities */
+       else {
+               dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+               return -ENXIO;
+       }
+
+       arena->log_index[0] = log_index[0];
+       arena->log_index[1] = log_index[1];
+       dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+       dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+       return 0;
+}
+
  static int btt_rtt_init(struct arena_info *arena)
  {
         arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
         available -= 2 * BTT_PG_SIZE;
  
         /* The log takes a fixed amount of space based on nfree */
-       logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
-                               BTT_PG_SIZE);
+       logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
         available -= logsize;
  
         /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
         arena->mapoff = arena->dataoff + datasize;
         arena->logoff = arena->mapoff + mapsize;
         arena->info2off = arena->logoff + logsize;
+
+       /* Default log indices are (0,1) */
+       arena->log_index[0] = 0;
+       arena->log_index[1] = 1;
         return arena;
  }
  
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
                 arena->external_lba_start = cur_nlba;
                 parse_arena_meta(arena, super, cur_off);
  
+               ret = log_set_indices(arena);
+               if (ret) {
+                       dev_err(to_dev(arena),
+                               "Unable to deduce log/padding indices\n");
+                       goto out;
+               }
+
                 mutex_init(&arena->err_lock);
                 ret = btt_freelist_init(arena);
                 if (ret)
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h

index 578c2057524d396fbf7c2eee88804b58c1f17cfe..db3cb6d4d0d495df8978494ff6619e2923478d32 100644 (file)
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -27,6 +27,7 @@
  #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
  #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
  #define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
  #define LOG_ENT_SIZE sizeof(struct log_entry)
  #define ARENA_MIN_SIZE (1UL << 24)     /* 16 MB */
  #define ARENA_MAX_SIZE (1ULL << 39)    /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
         INIT_READY
  };
  
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * |       pad       |       pad       |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
  struct log_entry {
         __le32 lba;
         __le32 old_map;
         __le32 new_map;
         __le32 seq;
-       __le64 padding[2];
+};
+
+struct log_group {
+       struct log_entry ent[4];
  };
  
  struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
   * @list:              List head for list of arenas
   * @debugfs_dir:       Debugfs dentry
   * @flags:             Arena flags - may signify error states.
+ * @err_lock:          Mutex for synchronizing error clearing.
+ * @log_index:         Indices of the valid log entries in a log_group
   *
   * arena_info is a per-arena handle. Once an arena is narrowed down for an
   * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
         /* Arena flags */
         u32 flags;
         struct mutex err_lock;
+       int log_index[2];
  };
  
  /**
@@ -176,6 +220,7 @@ struct arena_info {
   * @init_lock:         Mutex used for the BTT initialization
   * @init_state:                Flag describing the initialization state for the BTT
   * @num_arenas:                Number of arenas in the BTT instance
+ * @phys_bb:           Pointer to the namespace's badblocks structure
   */
  struct btt {
         struct gendisk *btt_disk;
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c

index 65cc171c721de8774baf05f9650f3bbacf511eec..2adada1a58551776186d6f6928a437d462734a48 100644 (file)
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
  int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
  {
         u64 checksum, offset;
-       unsigned long align;
         enum nd_pfn_mode mode;
         struct nd_namespace_io *nsio;
+       unsigned long align, start_pad;
         struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
         struct nd_namespace_common *ndns = nd_pfn->ndns;
         const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
  
         align = le32_to_cpu(pfn_sb->align);
         offset = le64_to_cpu(pfn_sb->dataoff);
+       start_pad = le32_to_cpu(pfn_sb->start_pad);
         if (align == 0)
                 align = 1UL << ilog2(offset);
         mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                 return -EBUSY;
         }
  
-       if ((align && !IS_ALIGNED(offset, align))
+       if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
                         || !IS_ALIGNED(offset, PAGE_SIZE)) {
                 dev_err(&nd_pfn->dev,
                                 "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
         return altmap;
  }
  
+static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
+{
+       return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
+                       ALIGN_DOWN(phys, nd_pfn->align));
+}
+
  static int nd_pfn_init(struct nd_pfn *nd_pfn)
  {
         u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
         start = nsio->res.start;
         size = PHYS_SECTION_ALIGN_UP(start + size) - start;
         if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-                               IORES_DESC_NONE) == REGION_MIXED) {
+                               IORES_DESC_NONE) == REGION_MIXED
+                       || !IS_ALIGNED(start + resource_size(&nsio->res),
+                               nd_pfn->align)) {
                 size = resource_size(&nsio->res);
-               end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+               end_trunc = start + size - phys_pmem_align_down(nd_pfn,
+                               start + size);
         }
  
         if (start_pad + end_trunc)
-               dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+               dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
                                 dev_name(&ndns->dev), start_pad + end_trunc);
  
         /*
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index f837d666cbd499c8e33a1514f55344a1796005a1..1e46e60b8f1080e339ebe81c1710dabb23afef75 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
         BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                         NVME_DSM_MAX_RANGES);
  
-       queue->limits.discard_alignment = size;
+       queue->limits.discard_alignment = 0;
         queue->limits.discard_granularity = size;
  
         blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
                 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
         }
-       if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+       if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+           is_power_of_2(ctrl->max_hw_sectors))
                 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
         blk_queue_virt_boundary(q, ctrl->page_size - 1);
         if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
  
         blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
         nvme_set_queue_limits(ctrl, ns->queue);
-       nvme_setup_streams_ns(ctrl, ns);
  
         id = nvme_identify_ns(ctrl, nsid);
         if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
  
         if (nvme_init_ns_head(ns, nsid, id, &new))
                 goto out_free_id;
+       nvme_setup_streams_ns(ctrl, ns);
         
  #ifdef CONFIG_NVME_MULTIPATH
         /*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                 return;
  
         if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-               if (blk_get_integrity(ns->disk))
-                       blk_integrity_unregister(ns->disk);
                 nvme_mpath_remove_disk_links(ns);
                 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
                                         &nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                         nvme_nvm_unregister_sysfs(ns);
                 del_gendisk(ns->disk);
                 blk_cleanup_queue(ns->queue);
+               if (blk_get_integrity(ns->disk))
+                       blk_integrity_unregister(ns->disk);
         }
  
         mutex_lock(&ns->ctrl->subsys->lock);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index 0a8af4daef8903f8ba983d345f1044498c57a975..794e66e4aa20115f4dc3a6b5fc12f706b2040bf4 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
  
                 /* initiate nvme ctrl ref counting teardown */
                 nvme_uninit_ctrl(&ctrl->ctrl);
-               nvme_put_ctrl(&ctrl->ctrl);
  
                 /* Remove core ctrl ref. */
                 nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c

index 98258583abb0b40529056767c91401296e0013d4..3481e69738b5f94d18cc393f77934fac7cfc7fdc 100644 (file)
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -81,6 +81,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
          * can be looked up later */
         of_node_get(child);
         phy->mdio.dev.of_node = child;
+       phy->mdio.dev.fwnode = of_fwnode_handle(child);
  
         /* All data is now stored in the phy struct;
          * register it */
@@ -111,6 +112,7 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
          */
         of_node_get(child);
         mdiodev->dev.of_node = child;
+       mdiodev->dev.fwnode = of_fwnode_handle(child);
  
         /* All data is now stored in the mdiodev struct; register it. */
         rc = mdio_device_register(mdiodev);
@@ -206,6 +208,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
         mdio->phy_mask = ~0;
  
         mdio->dev.of_node = np;
+       mdio->dev.fwnode = of_fwnode_handle(np);
  
         /* Get bus level PHY reset GPIO details */
         mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY;
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c

index a25fed52f7e94de4bd3dd5cb8b0922e1df8e81bf..41b740aed3a346e4bbc610959281649447f83bd4 100644 (file)
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
         iounmap(base_addr);
  }
  
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1292)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+       quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1291)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+       quirk_diva_aux_disable);
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c

index 12796eccb2befd9170b1ba8119dc600ef356c7e8..52ab3cb0a0bfe065d8209201cbf03a7ef15cdfa4 100644 (file)
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -1128,12 +1128,12 @@ static int rcar_pcie_probe(struct platform_device *pdev)
         err = rcar_pcie_get_resources(pcie);
         if (err < 0) {
                 dev_err(dev, "failed to request resources: %d\n", err);
-               goto err_free_bridge;
+               goto err_free_resource_list;
         }
  
         err = rcar_pcie_parse_map_dma_ranges(pcie, dev->of_node);
         if (err)
-               goto err_free_bridge;
+               goto err_free_resource_list;
  
         pm_runtime_enable(dev);
         err = pm_runtime_get_sync(dev);
@@ -1176,9 +1176,9 @@ err_pm_put:
  err_pm_disable:
         pm_runtime_disable(dev);
  
-err_free_bridge:
-       pci_free_host_bridge(bridge);
+err_free_resource_list:
         pci_free_resource_list(&pcie->resources);
+       pci_free_host_bridge(bridge);
  
         return err;
  }
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c

index 7f47bb72bf301cd62cdcaa48e2e560e2df06e3a3..14fd865a512096393149fd63a3707305648f276f 100644 (file)
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -999,7 +999,7 @@ static int pci_pm_thaw_noirq(struct device *dev)
          * the subsequent "thaw" callbacks for the device.
          */
         if (dev_pm_smart_suspend_and_suspended(dev)) {
-               dev->power.direct_complete = true;
+               dev_pm_skip_next_resume_phases(dev);
                 return 0;
         }
  
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
         if (pci_has_legacy_pm_support(pci_dev))
                 return pci_legacy_resume_early(dev);
  
-       pci_update_current_state(pci_dev, PCI_D0);
+       /*
+        * pci_restore_state() requires the device to be in D0 (because of MSI
+        * restoration among other things), so force it into D0 in case the
+        * driver's "freeze" callbacks put it into a low-power state directly.
+        */
+       pci_set_power_state(pci_dev, PCI_D0);
         pci_restore_state(pci_dev);
  
         if (drv && drv->pm && drv->pm->thaw_noirq)
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c

index bdedb6325c72a5fa0966377d53678722d18f07eb..4471fd94e1fe1f48b953360ad76e638e88f1a7ff 100644 (file)
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                         clear_bit(i, chip->irq.valid_mask);
         }
  
+       /*
+        * The same set of machines in chv_no_valid_mask[] have incorrectly
+        * configured GPIOs that generate spurious interrupts so we use
+        * this same list to apply another quirk for them.
+        *
+        * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
+        */
+       if (!need_valid_mask) {
+               /*
+                * Mask all interrupts the community is able to generate
+                * but leave the ones that can only generate GPEs unmasked.
+                */
+               chv_writel(GENMASK(31, pctrl->community->nirqs),
+                          pctrl->regs + CHV_INTMASK);
+       }
+
         /* Clear all interrupts */
         chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
  
diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c

index f3796164329efb9ea00aed180a8c1116b813c979..d4aeac3477f55086b69aafa0098ad2f3d617d508 100644 (file)
--- a/drivers/platform/x86/asus-wireless.c
+++ b/drivers/platform/x86/asus-wireless.c
@@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
                 return;
         }
         input_report_key(data->idev, KEY_RFKILL, 1);
+       input_sync(data->idev);
         input_report_key(data->idev, KEY_RFKILL, 0);
         input_sync(data->idev);
  }
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c

index bf897b1832b188c24a92f33fdf74b33b25aeac43..cd4725e7e0b56d8981bc770ca80b562c874370f8 100644 (file)
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -37,6 +37,7 @@
  
  struct quirk_entry {
         u8 touchpad_led;
+       u8 kbd_led_levels_off_1;
  
         int needs_kbd_timeouts;
         /*
@@ -67,6 +68,10 @@ static struct quirk_entry quirk_dell_xps13_9333 = {
         .kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
  };
  
+static struct quirk_entry quirk_dell_latitude_e6410 = {
+       .kbd_led_levels_off_1 = 1,
+};
+
  static struct platform_driver platform_driver = {
         .driver = {
                 .name = "dell-laptop",
@@ -269,6 +274,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
                 },
                 .driver_data = &quirk_dell_xps13_9333,
         },
+       {
+               .callback = dmi_matched,
+               .ident = "Dell Latitude E6410",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6410"),
+               },
+               .driver_data = &quirk_dell_latitude_e6410,
+       },
         { }
  };
  
@@ -1149,6 +1163,9 @@ static int kbd_get_info(struct kbd_info *info)
         units = (buffer->output[2] >> 8) & 0xFF;
         info->levels = (buffer->output[2] >> 16) & 0xFF;
  
+       if (quirks && quirks->kbd_led_levels_off_1 && info->levels)
+               info->levels--;
+
         if (units & BIT(0))
                 info->seconds = (buffer->output[3] >> 0) & 0xFF;
         if (units & BIT(1))
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c

index 39d2f451848332d8346201b9cc1cd08bb5e10427..fb25b20df316f39140a3af0f974e9f1fa30aabff 100644 (file)
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -639,6 +639,8 @@ static int dell_wmi_events_set_enabled(bool enable)
         int ret;
  
         buffer = kzalloc(sizeof(struct calling_interface_buffer), GFP_KERNEL);
+       if (!buffer)
+               return -ENOMEM;
         buffer->cmd_class = CLASS_INFO;
         buffer->cmd_select = SELECT_APP_REGISTRATION;
         buffer->input[0] = 0x10000;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h

index 15015a24f8ad750d2c107bf278442340cef801fd..badf42acbf95b8104d6167dfc811640b1ec311df 100644 (file)
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -565,9 +565,9 @@ enum qeth_cq {
  };
  
  struct qeth_ipato {
-       int enabled;
-       int invert4;
-       int invert6;
+       bool enabled;
+       bool invert4;
+       bool invert6;
         struct list_head entries;
  };
  
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c

index 430e3214f7e26791af247d402734efb0ebb9cfc3..3614df68830f8f6a4abd756c52ca4e1e72e8e1d1 100644 (file)
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1480,9 +1480,9 @@ static int qeth_setup_card(struct qeth_card *card)
         qeth_set_intial_options(card);
         /* IP address takeover */
         INIT_LIST_HEAD(&card->ipato.entries);
-       card->ipato.enabled = 0;
-       card->ipato.invert4 = 0;
-       card->ipato.invert6 = 0;
+       card->ipato.enabled = false;
+       card->ipato.invert4 = false;
+       card->ipato.invert6 = false;
         /* init QDIO stuff */
         qeth_init_qdio_info(card);
         INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
@@ -5386,6 +5386,13 @@ out:
  }
  EXPORT_SYMBOL_GPL(qeth_poll);
  
+static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
+{
+       if (!cmd->hdr.return_code)
+               cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
+       return cmd->hdr.return_code;
+}
+
  int qeth_setassparms_cb(struct qeth_card *card,
                         struct qeth_reply *reply, unsigned long data)
  {
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
                                 (struct qeth_checksum_cmd *)reply->param;
  
         QETH_CARD_TEXT(card, 4, "chkdoccb");
-       if (cmd->hdr.return_code)
+       if (qeth_setassparms_inspect_rc(cmd))
                 return 0;
  
         memset(chksum_cb, 0, sizeof(*chksum_cb));
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h

index 194ae9b577ccaeda712fece16868115c13fc38bb..e5833837b799eceb069e708fe88394731eecdda4 100644 (file)
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -82,7 +82,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
  int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
  void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions,
                         const u8 *);
-int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *);
+void qeth_l3_update_ipato(struct qeth_card *card);
  struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
  int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
  int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c

index 6a73894b0cb51d2896bea853f6a33113fed98c13..ef0961e186869dd6b8ab06c0ebd901524bc41ffc 100644 (file)
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -164,8 +164,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
         }
  }
  
-int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
-                                               struct qeth_ipaddr *addr)
+static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
+                                            struct qeth_ipaddr *addr)
  {
         struct qeth_ipato_entry *ipatoe;
         u8 addr_bits[128] = {0, };
@@ -174,6 +174,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
  
         if (!card->ipato.enabled)
                 return 0;
+       if (addr->type != QETH_IP_TYPE_NORMAL)
+               return 0;
  
         qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits,
                                   (addr->proto == QETH_PROT_IPV4)? 4:16);
@@ -290,8 +292,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
                 memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
                 addr->ref_counter = 1;
  
-               if (addr->type == QETH_IP_TYPE_NORMAL  &&
-                               qeth_l3_is_addr_covered_by_ipato(card, addr)) {
+               if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
                         QETH_CARD_TEXT(card, 2, "tkovaddr");
                         addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
                 }
@@ -605,6 +606,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
  /*
   * IP address takeover related functions
   */
+
+/**
+ * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs.
+ *
+ * Caller must hold ip_lock.
+ */
+void qeth_l3_update_ipato(struct qeth_card *card)
+{
+       struct qeth_ipaddr *addr;
+       unsigned int i;
+
+       hash_for_each(card->ip_htable, i, addr, hnode) {
+               if (addr->type != QETH_IP_TYPE_NORMAL)
+                       continue;
+               if (qeth_l3_is_addr_covered_by_ipato(card, addr))
+                       addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+               else
+                       addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+       }
+}
+
  static void qeth_l3_clear_ipato_list(struct qeth_card *card)
  {
         struct qeth_ipato_entry *ipatoe, *tmp;
@@ -616,6 +638,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card)
                 kfree(ipatoe);
         }
  
+       qeth_l3_update_ipato(card);
         spin_unlock_bh(&card->ip_lock);
  }
  
@@ -640,8 +663,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
                 }
         }
  
-       if (!rc)
+       if (!rc) {
                 list_add_tail(&new->entry, &card->ipato.entries);
+               qeth_l3_update_ipato(card);
+       }
  
         spin_unlock_bh(&card->ip_lock);
  
@@ -664,6 +689,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card,
                             (proto == QETH_PROT_IPV4)? 4:16) &&
                     (ipatoe->mask_bits == mask_bits)) {
                         list_del(&ipatoe->entry);
+                       qeth_l3_update_ipato(card);
                         kfree(ipatoe);
                 }
         }
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c

index bd12fdf678bec32d7f40ff38e5c66a4c1e26f402..6ea2b528a64efbabee5782da7bf8c5d1bce3ff4e 100644 (file)
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -370,8 +370,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
                 struct device_attribute *attr, const char *buf, size_t count)
  {
         struct qeth_card *card = dev_get_drvdata(dev);
-       struct qeth_ipaddr *addr;
-       int i, rc = 0;
+       bool enable;
+       int rc = 0;
  
         if (!card)
                 return -EINVAL;
@@ -384,25 +384,18 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
         }
  
         if (sysfs_streq(buf, "toggle")) {
-               card->ipato.enabled = (card->ipato.enabled)? 0 : 1;
-       } else if (sysfs_streq(buf, "1")) {
-               card->ipato.enabled = 1;
-               hash_for_each(card->ip_htable, i, addr, hnode) {
-                               if ((addr->type == QETH_IP_TYPE_NORMAL) &&
-                               qeth_l3_is_addr_covered_by_ipato(card, addr))
-                                       addr->set_flags |=
-                                       QETH_IPA_SETIP_TAKEOVER_FLAG;
-                       }
-       } else if (sysfs_streq(buf, "0")) {
-               card->ipato.enabled = 0;
-               hash_for_each(card->ip_htable, i, addr, hnode) {
-                       if (addr->set_flags &
-                       QETH_IPA_SETIP_TAKEOVER_FLAG)
-                               addr->set_flags &=
-                               ~QETH_IPA_SETIP_TAKEOVER_FLAG;
-                       }
-       } else
+               enable = !card->ipato.enabled;
+       } else if (kstrtobool(buf, &enable)) {
                 rc = -EINVAL;
+               goto out;
+       }
+
+       if (card->ipato.enabled != enable) {
+               card->ipato.enabled = enable;
+               spin_lock_bh(&card->ip_lock);
+               qeth_l3_update_ipato(card);
+               spin_unlock_bh(&card->ip_lock);
+       }
  out:
         mutex_unlock(&card->conf_mutex);
         return rc ? rc : count;
@@ -428,20 +421,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev,
                                 const char *buf, size_t count)
  {
         struct qeth_card *card = dev_get_drvdata(dev);
+       bool invert;
         int rc = 0;
  
         if (!card)
                 return -EINVAL;
  
         mutex_lock(&card->conf_mutex);
-       if (sysfs_streq(buf, "toggle"))
-               card->ipato.invert4 = (card->ipato.invert4)? 0 : 1;
-       else if (sysfs_streq(buf, "1"))
-               card->ipato.invert4 = 1;
-       else if (sysfs_streq(buf, "0"))
-               card->ipato.invert4 = 0;
-       else
+       if (sysfs_streq(buf, "toggle")) {
+               invert = !card->ipato.invert4;
+       } else if (kstrtobool(buf, &invert)) {
                 rc = -EINVAL;
+               goto out;
+       }
+
+       if (card->ipato.invert4 != invert) {
+               card->ipato.invert4 = invert;
+               spin_lock_bh(&card->ip_lock);
+               qeth_l3_update_ipato(card);
+               spin_unlock_bh(&card->ip_lock);
+       }
+out:
         mutex_unlock(&card->conf_mutex);
         return rc ? rc : count;
  }
@@ -607,20 +607,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev,
                 struct device_attribute *attr, const char *buf, size_t count)
  {
         struct qeth_card *card = dev_get_drvdata(dev);
+       bool invert;
         int rc = 0;
  
         if (!card)
                 return -EINVAL;
  
         mutex_lock(&card->conf_mutex);
-       if (sysfs_streq(buf, "toggle"))
-               card->ipato.invert6 = (card->ipato.invert6)? 0 : 1;
-       else if (sysfs_streq(buf, "1"))
-               card->ipato.invert6 = 1;
-       else if (sysfs_streq(buf, "0"))
-               card->ipato.invert6 = 0;
-       else
+       if (sysfs_streq(buf, "toggle")) {
+               invert = !card->ipato.invert6;
+       } else if (kstrtobool(buf, &invert)) {
                 rc = -EINVAL;
+               goto out;
+       }
+
+       if (card->ipato.invert6 != invert) {
+               card->ipato.invert6 = invert;
+               spin_lock_bh(&card->ip_lock);
+               qeth_l3_update_ipato(card);
+               spin_unlock_bh(&card->ip_lock);
+       }
+out:
         mutex_unlock(&card->conf_mutex);
         return rc ? rc : count;
  }
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h

index 6e3d81969a77cc895580f79fa8e3aaa3b8bb4fee..d52265416da2af0da11cca770304f33ab203ad20 100644 (file)
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1725,6 +1725,7 @@ struct aac_dev
  #define FIB_CONTEXT_FLAG_NATIVE_HBA            (0x00000010)
  #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF        (0x00000020)
  #define FIB_CONTEXT_FLAG_SCSI_CMD      (0x00000040)
+#define FIB_CONTEXT_FLAG_EH_RESET      (0x00000080)
  
  /*
   *     Define the command values
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c

index bec9f3193f607c0aa1ca5f728f5eac85de518d61..80a8cb26cdea43c8252bc5afe68b2b72540ff045 100644 (file)
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2482,8 +2482,8 @@ int aac_command_thread(void *data)
                         /* Synchronize our watches */
                         if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec)
                          && (now.tv_nsec > (NSEC_PER_SEC / HZ)))
-                               difference = (((NSEC_PER_SEC - now.tv_nsec) * HZ)
-                                 + NSEC_PER_SEC / 2) / NSEC_PER_SEC;
+                               difference = HZ + HZ / 2 -
+                                            now.tv_nsec / (NSEC_PER_SEC / HZ);
                         else {
                                 if (now.tv_nsec > NSEC_PER_SEC / 2)
                                         ++now.tv_sec;
@@ -2507,6 +2507,10 @@ int aac_command_thread(void *data)
                 if (kthread_should_stop())
                         break;
  
+               /*
+                * we probably want usleep_range() here instead of the
+                * jiffies computation
+                */
                 schedule_timeout(difference);
  
                 if (kthread_should_stop())
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c

index bdf127aaab41d814e2337d2944166a0498bf1a66..d55332de08f91ad8e54e1296867569a8fa109a34 100644 (file)
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
                         info = &aac->hba_map[bus][cid];
                         if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
                             info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
-                               fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+                               fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
                                 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
                         }
                 }
diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c

index 72ca2a2e08e259b70be45e8880354bf8291ef9c4..b2fa195adc7a3a6e405a6c23132060952916138f 100644 (file)
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -3135,7 +3135,8 @@ bfad_im_bsg_vendor_request(struct bsg_job *job)
         struct fc_bsg_request *bsg_request = job->request;
         struct fc_bsg_reply *bsg_reply = job->reply;
         uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0];
-       struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job));
+       struct Scsi_Host *shost = fc_bsg_to_shost(job);
+       struct bfad_im_port_s *im_port = bfad_get_im_port(shost);
         struct bfad_s *bfad = im_port->bfad;
         void *payload_kbuf;
         int rc = -EINVAL;
@@ -3350,7 +3351,8 @@ int
  bfad_im_bsg_els_ct_request(struct bsg_job *job)
  {
         struct bfa_bsg_data *bsg_data;
-       struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job));
+       struct Scsi_Host *shost = fc_bsg_to_shost(job);
+       struct bfad_im_port_s *im_port = bfad_get_im_port(shost);
         struct bfad_s *bfad = im_port->bfad;
         bfa_bsg_fcpt_t *bsg_fcpt;
         struct bfad_fcxp    *drv_fcxp;
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c

index 24e657a4ec80df3caf82eff0b284288acc875a28..c05d6e91e4bde9cde7e9146ad48bae69727d476f 100644 (file)
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -546,6 +546,7 @@ int
  bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
                         struct device *dev)
  {
+       struct bfad_im_port_pointer *im_portp;
         int error = 1;
  
         mutex_lock(&bfad_mutex);
@@ -564,7 +565,8 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port,
                 goto out_free_idr;
         }
  
-       im_port->shost->hostdata[0] = (unsigned long)im_port;
+       im_portp = shost_priv(im_port->shost);
+       im_portp->p = im_port;
         im_port->shost->unique_id = im_port->idr_id;
         im_port->shost->this_id = -1;
         im_port->shost->max_id = MAX_FCP_TARGET;
@@ -748,7 +750,7 @@ bfad_scsi_host_alloc(struct bfad_im_port_s *im_port, struct bfad_s *bfad)
  
         sht->sg_tablesize = bfad->cfg_data.io_max_sge;
  
-       return scsi_host_alloc(sht, sizeof(unsigned long));
+       return scsi_host_alloc(sht, sizeof(struct bfad_im_port_pointer));
  }
  
  void
diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h

index c81ec2a77ef5034d3fdd3c70f1466002e012be10..06ce4ba2b7bc9e6e562a20d98de2a6a99fff93d1 100644 (file)
--- a/drivers/scsi/bfa/bfad_im.h
+++ b/drivers/scsi/bfa/bfad_im.h
@@ -69,6 +69,16 @@ struct bfad_im_port_s {
         struct fc_vport *fc_vport;
  };
  
+struct bfad_im_port_pointer {
+       struct bfad_im_port_s *p;
+};
+
+static inline struct bfad_im_port_s *bfad_get_im_port(struct Scsi_Host *host)
+{
+       struct bfad_im_port_pointer *im_portp = shost_priv(host);
+       return im_portp->p;
+}
+
  enum bfad_itnim_state {
         ITNIM_STATE_NONE,
         ITNIM_STATE_ONLINE,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c

index 5da46052e179c200a8e476f38551e470f6dd9173..21be672679fb5026120049b1609569bf8d25259a 100644 (file)
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -904,10 +904,14 @@ static void fc_lport_recv_els_req(struct fc_lport *lport,
                 case ELS_FLOGI:
                         if (!lport->point_to_multipoint)
                                 fc_lport_recv_flogi_req(lport, fp);
+                       else
+                               fc_rport_recv_req(lport, fp);
                         break;
                 case ELS_LOGO:
                         if (fc_frame_sid(fp) == FC_FID_FLOGI)
                                 fc_lport_recv_logo_req(lport, fp);
+                       else
+                               fc_rport_recv_req(lport, fp);
                         break;
                 case ELS_RSCN:
                         lport->tt.disc_recv_req(lport, fp);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c

index ca1566237ae7744703a9e3ae3533138c4e81af53..3183d63de4dab7f5b000f2b6da856eea70c6b900 100644 (file)
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2145,7 +2145,7 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
                 struct sas_rphy *rphy)
  {
         struct domain_device *dev;
-       unsigned int reslen = 0;
+       unsigned int rcvlen = 0;
         int ret = -EINVAL;
  
         /* no rphy means no smp target support (ie aic94xx host) */
@@ -2179,12 +2179,12 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
  
         ret = smp_execute_task_sg(dev, job->request_payload.sg_list,
                         job->reply_payload.sg_list);
-       if (ret > 0) {
-               /* positive number is the untransferred residual */
-               reslen = ret;
+       if (ret >= 0) {
+               /* bsg_job_done() requires the length received  */
+               rcvlen = job->reply_payload.payload_len - ret;
                 ret = 0;
         }
  
  out:
-       bsg_job_done(job, ret, reslen);
+       bsg_job_done(job, ret, rcvlen);
  }
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c

index 56faeb049b4ac50ec04783349fcea9b79c20583f..87c08ff37dddff46fed7841247a41f585ebc8f1d 100644 (file)
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -753,12 +753,12 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
         drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
         rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
         if (rc < 0) {
-               (rqbp->rqb_free_buffer)(phba, rqb_entry);
                 lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                 "6409 Cannot post to RQ %d: %x %x\n",
                                 rqb_entry->hrq->queue_id,
                                 rqb_entry->hrq->host_index,
                                 rqb_entry->hrq->hba_index);
+               (rqbp->rqb_free_buffer)(phba, rqb_entry);
         } else {
                 list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
                 rqbp->buffer_count++;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c

index a4f28b7e4c65df81ef583eab878a3aa9fc45e0e4..e18877177f1b52d9c43ad3b991b858c80a6cc079 100644 (file)
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
                 return req;
  
         for_each_bio(bio) {
-               ret = blk_rq_append_bio(req, bio);
+               struct bio *bounce_bio = bio;
+
+               ret = blk_rq_append_bio(req, &bounce_bio);
                 if (ret)
                         return ERR_PTR(ret);
         }
diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c

index 01f08c03f2c185dc3f7e1d423c3459da95fb6cbb..c3765d29fd3ff8167a647563b43be57e885dd6a9 100644 (file)
--- a/drivers/scsi/scsi_debugfs.c
+++ b/drivers/scsi/scsi_debugfs.c
@@ -8,9 +8,11 @@ void scsi_show_rq(struct seq_file *m, struct request *rq)
  {
         struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req);
         int msecs = jiffies_to_msecs(jiffies - cmd->jiffies_at_alloc);
-       char buf[80];
+       const u8 *const cdb = READ_ONCE(cmd->cmnd);
+       char buf[80] = "(?)";
  
-       __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len);
+       if (cdb)
+               __scsi_format_command(buf, sizeof(buf), cdb, cmd->cmd_len);
         seq_printf(m, ", .cmd=%s, .retries=%d, allocated %d.%03d s ago", buf,
                    cmd->retries, msecs / 1000, msecs % 1000);
  }
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c

index 78d4aa8df675a1671df5daf12dfc7d9000f3cbe5..dfb8da83fa504c979e9ba0639b32a4cae2c8969c 100644 (file)
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -34,7 +34,6 @@ struct scsi_dev_info_list_table {
  };
  
  
-static const char spaces[] = "                "; /* 16 of them */
  static blist_flags_t scsi_default_dev_flags;
  static LIST_HEAD(scsi_dev_info_list);
  static char scsi_dev_flags[256];
@@ -298,20 +297,13 @@ static void scsi_strcpy_devinfo(char *name, char *to, size_t to_length,
         size_t from_length;
  
         from_length = strlen(from);
-       strncpy(to, from, min(to_length, from_length));
-       if (from_length < to_length) {
-               if (compatible) {
-                       /*
-                        * NUL terminate the string if it is short.
-                        */
-                       to[from_length] = '\0';
-               } else {
-                       /*
-                        * space pad the string if it is short.
-                        */
-                       strncpy(&to[from_length], spaces,
-                               to_length - from_length);
-               }
+       /* This zero-pads the destination */
+       strncpy(to, from, to_length);
+       if (from_length < to_length && !compatible) {
+               /*
+                * space pad the string if it is short.
+                */
+               memset(&to[from_length], ' ', to_length - from_length);
         }
         if (from_length > to_length)
                  printk(KERN_WARNING "%s: %s string '%s' is too long\n",
@@ -382,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
                             model, compatible);
  
         if (strflags)
-               devinfo->flags = simple_strtoul(strflags, NULL, 0);
-       else
-               devinfo->flags = flags;
-
+               flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
+       devinfo->flags = flags;
         devinfo->compatible = compatible;
  
         if (compatible)
@@ -458,7 +448,8 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
                         /*
                          * vendor strings must be an exact match
                          */
-                       if (vmax != strlen(devinfo->vendor) ||
+                       if (vmax != strnlen(devinfo->vendor,
+                                           sizeof(devinfo->vendor)) ||
                             memcmp(devinfo->vendor, vskip, vmax))
                                 continue;
  
@@ -466,7 +457,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor,
                          * @model specifies the full string, and
                          * must be larger or equal to devinfo->model
                          */
-                       mlen = strlen(devinfo->model);
+                       mlen = strnlen(devinfo->model, sizeof(devinfo->model));
                         if (mmax < mlen || memcmp(devinfo->model, mskip, mlen))
                                 continue;
                         return devinfo;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index 00742c50cd44ed6e452dc50b4eb9bf5bd3f36e80..d9ca1dfab154c83f79ba1dab210f1a3521d1ea5d 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1967,6 +1967,8 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
  out_put_device:
         put_device(&sdev->sdev_gendev);
  out:
+       if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
+               blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
         return false;
  }
  
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c

index be5e919db0e8cd9e713727a91bc46923673ea556..0880d975eed3a56c58d27172bfd18c1a59da5d4b 100644 (file)
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
   *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
   **/
  static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
-               int *bflags, int async)
+               blist_flags_t *bflags, int async)
  {
         int ret;
  
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
   *   - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
   **/
  static int scsi_probe_and_add_lun(struct scsi_target *starget,
-                                 u64 lun, int *bflagsp,
+                                 u64 lun, blist_flags_t *bflagsp,
                                   struct scsi_device **sdevp,
                                   enum scsi_scan_mode rescan,
                                   void *hostdata)
  {
         struct scsi_device *sdev;
         unsigned char *result;
-       int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
+       blist_flags_t bflags;
+       int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
         struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
  
         /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
   *     Modifies sdevscan->lun.
   **/
  static void scsi_sequential_lun_scan(struct scsi_target *starget,
-                                    int bflags, int scsi_level,
+                                    blist_flags_t bflags, int scsi_level,
                                      enum scsi_scan_mode rescan)
  {
         uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
   *     0: scan completed (or no memory, so further scanning is futile)
   *     1: could not scan with REPORT LUN
   **/
-static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
+static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
                                 enum scsi_scan_mode rescan)
  {
         unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
                 unsigned int id, u64 lun, enum scsi_scan_mode rescan)
  {
         struct Scsi_Host *shost = dev_to_shost(parent);
-       int bflags = 0;
+       blist_flags_t bflags = 0;
         int res;
         struct scsi_target *starget;
  
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c

index 50e7d7e4a86179b9a47d18569bb759be0b674b88..a9996c16f4ae63fc820065c75b68fb51c5a01ab4 100644 (file)
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
  }
  static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
  
-#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name
+#define BLIST_FLAG_NAME(name)                                  \
+       [ilog2((__force unsigned int)BLIST_##name)] = #name
  static const char *const sdev_bflags_name[] = {
  #include "scsi_devinfo_tbl.c"
  };
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
         for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
                 const char *name = NULL;
  
-               if (!(sdev->sdev_bflags & BIT(i)))
+               if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
                         continue;
                 if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
                         name = sdev_bflags_name[i];
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c

index d0219e36080c3b79109ac405eb0cd726545585fc..10ebb213ddb33e2920e2fe83e60cc712a50c3002 100644 (file)
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -50,14 +50,14 @@
  
  /* Our blacklist flags */
  enum {
-       SPI_BLIST_NOIUS = 0x1,
+       SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
  };
  
  /* blacklist table, modelled on scsi_devinfo.c */
  static struct {
         char *vendor;
         char *model;
-       unsigned flags;
+       blist_flags_t flags;
  } spi_static_device_list[] __initdata = {
         {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
         {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
  {
         struct scsi_device *sdev = to_scsi_device(dev);
         struct scsi_target *starget = sdev->sdev_target;
-       unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
-                                                     &sdev->inquiry[16],
-                                                     SCSI_DEVINFO_SPI);
+       blist_flags_t bflags;
+
+       bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
+                                            &sdev->inquiry[16],
+                                            SCSI_DEVINFO_SPI);
  
         /* Populate the target capability fields with the values
          * gleaned from the device inquiry */
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c

index 24fe685227169d1be705e9ddc1d8861cb962d2bf..a028ab3322a9a4ed3b37530c19268dc90d9098f4 100644 (file)
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1312,6 +1312,7 @@ static int sd_init_command(struct scsi_cmnd *cmd)
  static void sd_uninit_command(struct scsi_cmnd *SCpnt)
  {
         struct request *rq = SCpnt->request;
+       u8 *cmnd;
  
         if (SCpnt->flags & SCMD_ZONE_WRITE_LOCK)
                 sd_zbc_write_unlock_zone(SCpnt);
@@ -1320,9 +1321,10 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
                 __free_page(rq->special_vec.bv_page);
  
         if (SCpnt->cmnd != scsi_req(rq)->cmd) {
-               mempool_free(SCpnt->cmnd, sd_cdb_pool);
+               cmnd = SCpnt->cmnd;
                 SCpnt->cmnd = NULL;
                 SCpnt->cmd_len = 0;
+               mempool_free(cmnd, sd_cdb_pool);
         }
  }
  
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c

index 77fe55ce790c61a8835c4e2338a36be43dafcbac..d65345312527ce450b539964aa0465e1e6787b44 100644 (file)
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -79,6 +79,7 @@
  #define A3700_SPI_BYTE_LEN             BIT(5)
  #define A3700_SPI_CLK_PRESCALE         BIT(0)
  #define A3700_SPI_CLK_PRESCALE_MASK    (0x1f)
+#define A3700_SPI_CLK_EVEN_OFFS                (0x10)
  
  #define A3700_SPI_WFIFO_THRS_BIT       28
  #define A3700_SPI_RFIFO_THRS_BIT       24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
  
         prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
  
+       /* For prescaler values over 15, we can only set it by steps of 2.
+        * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
+        * 30. We only use this range from 16 to 30.
+        */
+       if (prescale > 15)
+               prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
+
         val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
         val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
  
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c

index f95da364c2832b0142158e12c97648aab81b7165..66947097102370d0f54ba2559abddab1ac812a5d 100644 (file)
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
         pm_runtime_get_sync(&pdev->dev);
  
         /* reset the hardware and block queue progress */
-       spin_lock_irq(&as->lock);
         if (as->use_dma) {
                 atmel_spi_stop_dma(master);
                 atmel_spi_release_dma(master);
         }
  
+       spin_lock_irq(&as->lock);
         spi_writel(as, CR, SPI_BIT(SWRST));
         spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
         spi_readl(as, SR);
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c

index 2ce875764ca646a2bdfb803cae33465ab8fa1786..0835a8d88fb8f85ab5ae44a4aa74d94121d19d87 100644 (file)
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
         /* Sets SPCMD */
         rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
  
-       /* Enables SPI function in master mode */
-       rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR);
+       /* Sets RSPI mode */
+       rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
  
         return 0;
  }
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c

index c5cd635c28f388bec2cfd47b9a6c6c9dcec9e046..41410031f8e99e6a1d54b8f94990df0133356ced 100644 (file)
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -525,7 +525,7 @@ err_free_master:
  
  static int sun4i_spi_remove(struct platform_device *pdev)
  {
-       pm_runtime_disable(&pdev->dev);
+       pm_runtime_force_suspend(&pdev->dev);
  
         return 0;
  }
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c

index bc7100b93dfcf0c24213f479f9a5fffc41666315..e0b9fe1d0e37d98a7243ca35a56b1d62e024e8b3 100644 (file)
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
         while (remaining_words) {
                 int n_words, tx_words, rx_words;
                 u32 sr;
+               int stalled;
  
                 n_words = min(remaining_words, xspi->buffer_size);
  
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
  
                 /* Read out all the data from the Rx FIFO */
                 rx_words = n_words;
+               stalled = 10;
                 while (rx_words) {
+                       if (rx_words == n_words && !(stalled--) &&
+                           !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+                           (sr & XSPI_SR_RX_EMPTY_MASK)) {
+                               dev_err(&spi->dev,
+                                       "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+                               xspi_init_hw(xspi);
+                               return -EIO;
+                       }
+
                         if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
                                 xilinx_spi_rx(xspi);
                                 rx_words--;
diff --git a/drivers/staging/ccree/ssi_hash.c b/drivers/staging/ccree/ssi_hash.c

index 1799d3f26a9e29858e1a4362fe4fc491609750dd..2035835b62dcd797e41209acdf079f0daf9f4f8f 100644 (file)
--- a/drivers/staging/ccree/ssi_hash.c
+++ b/drivers/staging/ccree/ssi_hash.c
@@ -1769,7 +1769,7 @@ static int ssi_ahash_import(struct ahash_request *req, const void *in)
         struct device *dev = drvdata_to_dev(ctx->drvdata);
         struct ahash_req_ctx *state = ahash_request_ctx(req);
         u32 tmp;
-       int rc;
+       int rc = 0;
  
         memcpy(&tmp, in, sizeof(u32));
         if (tmp != CC_EXPORT_MAGIC) {
diff --git a/drivers/staging/pi433/rf69.c b/drivers/staging/pi433/rf69.c

index e69a2153c999c796602f2884137faff8bc1ef8ab..12c9df9cddde22bea46650ededda1ed4d3e057ae 100644 (file)
--- a/drivers/staging/pi433/rf69.c
+++ b/drivers/staging/pi433/rf69.c
@@ -102,7 +102,7 @@ enum modulation rf69_get_modulation(struct spi_device *spi)
  
         currentValue = READ_REG(REG_DATAMODUL);
  
-       switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE >> 3) { // TODO improvement: change 3 to define
+       switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE) {
         case DATAMODUL_MODULATION_TYPE_OOK: return OOK;
         case DATAMODUL_MODULATION_TYPE_FSK: return FSK;
         default:                            return undefined;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c

index 7c69b4a9694d2016a8aac3b63a4b7d4399146688..0d99b242e82e3f84da25a47564f96db60be4b5f5 100644 (file)
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                         " %d i: %d bio: %p, allocating another"
                                         " bio\n", bio->bi_vcnt, i, bio);
  
-                               rc = blk_rq_append_bio(req, bio);
+                               rc = blk_rq_append_bio(req, &bio);
                                 if (rc) {
                                         pr_err("pSCSI: failed to append bio\n");
                                         goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
         }
  
         if (bio) {
-               rc = blk_rq_append_bio(req, bio);
+               rc = blk_rq_append_bio(req, &bio);
                 if (rc) {
                         pr_err("pSCSI: failed to append bio\n");
                         goto fail;
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c

index 55b198ba629b33b4923247736be8fb743286c324..78e92d29f8d98777c1294292808b1a868dcfcb7f 100644 (file)
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -555,6 +555,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
         unsigned iad_num = 0;
  
         memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE);
+       nintf = nintf_orig = config->desc.bNumInterfaces;
+       config->desc.bNumInterfaces = 0;        // Adjusted later
+
         if (config->desc.bDescriptorType != USB_DT_CONFIG ||
             config->desc.bLength < USB_DT_CONFIG_SIZE ||
             config->desc.bLength > size) {
@@ -568,7 +571,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
         buffer += config->desc.bLength;
         size -= config->desc.bLength;
  
-       nintf = nintf_orig = config->desc.bNumInterfaces;
         if (nintf > USB_MAXINTERFACES) {
                 dev_warn(ddev, "config %d has too many interfaces: %d, "
                     "using maximum allowed: %d\n",
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h

index f66c94130cac0b49a02d7ca081568bd88367589c..31749c79045f3a95ed27b6b338f9b94baf7b8d03 100644 (file)
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -537,6 +537,7 @@ struct dwc2_core_params {
   *                       2 - Internal DMA
   * @power_optimized     Are power optimizations enabled?
   * @num_dev_ep          Number of device endpoints available
+ * @num_dev_in_eps      Number of device IN endpoints available
   * @num_dev_perio_in_ep Number of device periodic IN endpoints
   *                      available
   * @dev_token_q_depth   Device Mode IN Token Sequence Learning Queue
@@ -565,6 +566,7 @@ struct dwc2_core_params {
   *                       2 - 8 or 16 bits
   * @snpsid:             Value from SNPSID register
   * @dev_ep_dirs:        Direction of device endpoints (GHWCFG1)
+ * @g_tx_fifo_size[]   Power-on values of TxFIFO sizes
   */
  struct dwc2_hw_params {
         unsigned op_mode:3;
@@ -586,12 +588,14 @@ struct dwc2_hw_params {
         unsigned fs_phy_type:2;
         unsigned i2c_enable:1;
         unsigned num_dev_ep:4;
+       unsigned num_dev_in_eps : 4;
         unsigned num_dev_perio_in_ep:4;
         unsigned total_fifo_size:16;
         unsigned power_optimized:1;
         unsigned utmi_phy_data_width:2;
         u32 snpsid;
         u32 dev_ep_dirs;
+       u32 g_tx_fifo_size[MAX_EPS_CHANNELS];
  };
  
  /* Size of control and EP0 buffers */
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c

index 88529d0925039f53112458e59c208c057a32a608..e4c3ce0de5de11ba5532fb34c8b8d72e1fd98511 100644 (file)
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -195,55 +195,18 @@ int dwc2_hsotg_tx_fifo_count(struct dwc2_hsotg *hsotg)
  {
         if (hsotg->hw_params.en_multiple_tx_fifo)
                 /* In dedicated FIFO mode we need count of IN EPs */
-               return (dwc2_readl(hsotg->regs + GHWCFG4)  &
-                       GHWCFG4_NUM_IN_EPS_MASK) >> GHWCFG4_NUM_IN_EPS_SHIFT;
+               return hsotg->hw_params.num_dev_in_eps;
         else
                 /* In shared FIFO mode we need count of Periodic IN EPs */
                 return hsotg->hw_params.num_dev_perio_in_ep;
  }
  
-/**
- * dwc2_hsotg_ep_info_size - return Endpoint Info Control block size in DWORDs
- */
-static int dwc2_hsotg_ep_info_size(struct dwc2_hsotg *hsotg)
-{
-       int val = 0;
-       int i;
-       u32 ep_dirs;
-
-       /*
-        * Don't need additional space for ep info control registers in
-        * slave mode.
-        */
-       if (!using_dma(hsotg)) {
-               dev_dbg(hsotg->dev, "Buffer DMA ep info size 0\n");
-               return 0;
-       }
-
-       /*
-        * Buffer DMA mode - 1 location per endpoit
-        * Descriptor DMA mode - 4 locations per endpoint
-        */
-       ep_dirs = hsotg->hw_params.dev_ep_dirs;
-
-       for (i = 0; i <= hsotg->hw_params.num_dev_ep; i++) {
-               val += ep_dirs & 3 ? 1 : 2;
-               ep_dirs >>= 2;
-       }
-
-       if (using_desc_dma(hsotg))
-               val = val * 4;
-
-       return val;
-}
-
  /**
   * dwc2_hsotg_tx_fifo_total_depth - return total FIFO depth available for
   * device mode TX FIFOs
   */
  int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
  {
-       int ep_info_size;
         int addr;
         int tx_addr_max;
         u32 np_tx_fifo_size;
@@ -252,8 +215,7 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
                                 hsotg->params.g_np_tx_fifo_size);
  
         /* Get Endpoint Info Control block size in DWORDs. */
-       ep_info_size = dwc2_hsotg_ep_info_size(hsotg);
-       tx_addr_max = hsotg->hw_params.total_fifo_size - ep_info_size;
+       tx_addr_max = hsotg->hw_params.total_fifo_size;
  
         addr = hsotg->params.g_rx_fifo_size + np_tx_fifo_size;
         if (tx_addr_max <= addr)
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c

index ef73af6e03a98828b7fadb46bd32ee291fd6302f..03fd20f0b49613aaffba14aa117e48a7781391a5 100644 (file)
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -484,8 +484,7 @@ static void dwc2_check_param_tx_fifo_sizes(struct dwc2_hsotg *hsotg)
         }
  
         for (fifo = 1; fifo <= fifo_count; fifo++) {
-               dptxfszn = (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) &
-                       FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT;
+               dptxfszn = hsotg->hw_params.g_tx_fifo_size[fifo];
  
                 if (hsotg->params.g_tx_fifo_size[fifo] < min ||
                     hsotg->params.g_tx_fifo_size[fifo] >  dptxfszn) {
@@ -609,6 +608,7 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
         struct dwc2_hw_params *hw = &hsotg->hw_params;
         bool forced;
         u32 gnptxfsiz;
+       int fifo, fifo_count;
  
         if (hsotg->dr_mode == USB_DR_MODE_HOST)
                 return;
@@ -617,6 +617,14 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
  
         gnptxfsiz = dwc2_readl(hsotg->regs + GNPTXFSIZ);
  
+       fifo_count = dwc2_hsotg_tx_fifo_count(hsotg);
+
+       for (fifo = 1; fifo <= fifo_count; fifo++) {
+               hw->g_tx_fifo_size[fifo] =
+                       (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) &
+                        FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT;
+       }
+
         if (forced)
                 dwc2_clear_force_mode(hsotg);
  
@@ -661,14 +669,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
         hwcfg4 = dwc2_readl(hsotg->regs + GHWCFG4);
         grxfsiz = dwc2_readl(hsotg->regs + GRXFSIZ);
  
-       /*
-        * Host specific hardware parameters. Reading these parameters
-        * requires the controller to be in host mode. The mode will
-        * be forced, if necessary, to read these values.
-        */
-       dwc2_get_host_hwparams(hsotg);
-       dwc2_get_dev_hwparams(hsotg);
-
         /* hwcfg1 */
         hw->dev_ep_dirs = hwcfg1;
  
@@ -711,6 +711,8 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
         hw->en_multiple_tx_fifo = !!(hwcfg4 & GHWCFG4_DED_FIFO_EN);
         hw->num_dev_perio_in_ep = (hwcfg4 & GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK) >>
                                   GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT;
+       hw->num_dev_in_eps = (hwcfg4 & GHWCFG4_NUM_IN_EPS_MASK) >>
+                            GHWCFG4_NUM_IN_EPS_SHIFT;
         hw->dma_desc_enable = !!(hwcfg4 & GHWCFG4_DESC_DMA);
         hw->power_optimized = !!(hwcfg4 & GHWCFG4_POWER_OPTIMIZ);
         hw->utmi_phy_data_width = (hwcfg4 & GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK) >>
@@ -719,6 +721,13 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
         /* fifo sizes */
         hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >>
                                 GRXFSIZ_DEPTH_SHIFT;
+       /*
+        * Host specific hardware parameters. Reading these parameters
+        * requires the controller to be in host mode. The mode will
+        * be forced, if necessary, to read these values.
+        */
+       dwc2_get_host_hwparams(hsotg);
+       dwc2_get_dev_hwparams(hsotg);
  
         return 0;
  }
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c

index c4a4d7bd27660225442e732e6be994c333b9b91b..7ae0eefc7cc7daf0382c7aeaa56c14659f025ffb 100644 (file)
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -51,8 +51,10 @@ static int dwc3_of_simple_clk_init(struct dwc3_of_simple *simple, int count)
  
                 clk = of_clk_get(np, i);
                 if (IS_ERR(clk)) {
-                       while (--i >= 0)
+                       while (--i >= 0) {
+                               clk_disable_unprepare(simple->clks[i]);
                                 clk_put(simple->clks[i]);
+                       }
                         return PTR_ERR(clk);
                 }
  
@@ -203,6 +205,7 @@ static struct platform_driver dwc3_of_simple_driver = {
         .driver         = {
                 .name   = "dwc3-of-simple",
                 .of_match_table = of_dwc3_simple_match,
+               .pm     = &dwc3_of_simple_dev_pm_ops,
         },
  };
  
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c

index 981fd986cf824804b752e64018289c79f191d646..639dd1b163a0e19e502ff2274c73101e3efaa777 100644 (file)
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -259,7 +259,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd,
  {
         const struct usb_endpoint_descriptor *desc = dep->endpoint.desc;
         struct dwc3             *dwc = dep->dwc;
-       u32                     timeout = 500;
+       u32                     timeout = 1000;
         u32                     reg;
  
         int                     cmd_status = 0;
@@ -912,7 +912,7 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
                          */
                         if (speed == USB_SPEED_HIGH) {
                                 struct usb_ep *ep = &dep->endpoint;
-                               unsigned int mult = ep->mult - 1;
+                               unsigned int mult = 2;
                                 unsigned int maxp = usb_endpoint_maxp(ep->desc);
  
                                 if (length <= (2 * maxp))
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig

index 0a19a76645adee42100e99171570f10c3de3739f..31cce7805eb2e93706468409a39f287fdebd5f5a 100644 (file)
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -508,8 +508,8 @@ choice
           controller, and the relevant drivers for each function declared
           by the device.
  
-endchoice
-
  source "drivers/usb/gadget/legacy/Kconfig"
  
+endchoice
+
  endif # USB_GADGET
diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig

index 9570bbeced4f86e541a690dfe2112cff12a785c7..784bf86dad4fcb0c8f9d10f2bd8652ee6a8d38d6 100644 (file)
--- a/drivers/usb/gadget/legacy/Kconfig
+++ b/drivers/usb/gadget/legacy/Kconfig
@@ -13,14 +13,6 @@
  # both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG).
  #
  
-menuconfig USB_GADGET_LEGACY
-       bool "Legacy USB Gadget Support"
-       help
-          Legacy USB gadgets are USB gadgets that do not use the USB gadget
-          configfs interface.
-
-if USB_GADGET_LEGACY
-
  config USB_ZERO
         tristate "Gadget Zero (DEVELOPMENT)"
         select USB_LIBCOMPOSITE
@@ -487,7 +479,7 @@ endif
  # or video class gadget drivers), or specific hardware, here.
  config USB_G_WEBCAM
         tristate "USB Webcam Gadget"
-       depends on VIDEO_DEV
+       depends on VIDEO_V4L2
         select USB_LIBCOMPOSITE
         select VIDEOBUF2_VMALLOC
         select USB_F_UVC
@@ -498,5 +490,3 @@ config USB_G_WEBCAM
  
           Say "y" to link the driver statically, or "m" to build a
           dynamically linked module called "g_webcam".
-
-endif
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c

index 15f7d422885f0c95d887d22eca4baf85a509ebf3..3a29b32a3bd06c43376bb493ae6f08a48e6f3174 100644 (file)
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -971,10 +971,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
                 return 0;
         }
  
-       xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags);
-       if (!xhci->devs[slot_id])
+       dev = kzalloc(sizeof(*dev), flags);
+       if (!dev)
                 return 0;
-       dev = xhci->devs[slot_id];
  
         /* Allocate the (output) device context that will be used in the HC. */
         dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags);
@@ -1015,9 +1014,17 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
  
         trace_xhci_alloc_virt_device(dev);
  
+       xhci->devs[slot_id] = dev;
+
         return 1;
  fail:
-       xhci_free_virt_device(xhci, slot_id);
+
+       if (dev->in_ctx)
+               xhci_free_container_ctx(xhci, dev->in_ctx);
+       if (dev->out_ctx)
+               xhci_free_container_ctx(xhci, dev->out_ctx);
+       kfree(dev);
+
         return 0;
  }
  
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c

index 6eb87c6e4d2420a3b19a6c41c1b797fac5e642dc..c5cbc685c6915ce9e5b6f884ec5cdd7ea0306dc4 100644 (file)
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3112,7 +3112,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
  {
         u32 maxp, total_packet_count;
  
-       /* MTK xHCI is mostly 0.97 but contains some features from 1.0 */
+       /* MTK xHCI 0.96 contains some features from 1.0 */
         if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST))
                 return ((td_total_len - transferred) >> 10);
  
@@ -3121,8 +3121,8 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
             trb_buff_len == td_total_len)
                 return 0;
  
-       /* for MTK xHCI, TD size doesn't include this TRB */
-       if (xhci->quirks & XHCI_MTK_HOST)
+       /* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */
+       if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100))
                 trb_buff_len = 0;
  
         maxp = usb_endpoint_maxp(&urb->ep->desc);
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c

index 0397606a211b2a62ab58067334784753a4e10668..6c036de63272b432b8b88255a2fd97db587187e6 100644 (file)
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -284,7 +284,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
                         musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
                         portstate(musb->port1_status |= USB_PORT_STAT_POWER);
                         del_timer(&musb->dev_timer);
-               } else {
+               } else if (!(musb->int_usb & MUSB_INTR_BABBLE)) {
+                       /*
+                        * When babble condition happens, drvvbus interrupt
+                        * is also generated. Ignore this drvvbus interrupt
+                        * and let babble interrupt handler recovers the
+                        * controller; otherwise, the host-mode flag is lost
+                        * due to the MUSB_DEV_MODE() call below and babble
+                        * recovery logic will not be called.
+                        */
                         musb->is_active = 0;
                         MUSB_DEV_MODE(musb);
                         otg->default_a = 0;
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h

index 2968046e7c059229cd8cec3611e150ccc3e70f23..f72d045ee9ef11940c056921d2760c649bc34d17 100644 (file)
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2100,6 +2100,13 @@ UNUSUAL_DEV(  0x152d, 0x0567, 0x0114, 0x0116,
                 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                 US_FL_BROKEN_FUA ),
  
+/* Reported by David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+               "JMicron",
+               "JMS567",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_BROKEN_FUA),
+
  /*
   * Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
   * JMicron responds to USN and several other SCSI ioctls with a
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h

index d520374a824e19796862280dcc17832a0d66321c..e6127fb21c123f397099dcae0ba5947ca7772061 100644 (file)
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -129,6 +129,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
                 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                 US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
  
+/* Reported-by: David Kozub <zub@linux.fjfi.cvut.cz> */
+UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999,
+               "JMicron",
+               "JMS567",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_BROKEN_FUA),
+
  /* Reported-by: Hans de Goede <hdegoede@redhat.com> */
  UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
                 "VIA",
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c

index 536e037f541faa01f16188e45e1afe130abc57ea..493ac2928391accc4984e3ceddbfdadd2690a26c 100644 (file)
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -322,23 +322,34 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev,
         return priv;
  }
  
-static int get_pipe(struct stub_device *sdev, int epnum, int dir)
+static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
  {
         struct usb_device *udev = sdev->udev;
         struct usb_host_endpoint *ep;
         struct usb_endpoint_descriptor *epd = NULL;
+       int epnum = pdu->base.ep;
+       int dir = pdu->base.direction;
+
+       if (epnum < 0 || epnum > 15)
+               goto err_ret;
  
         if (dir == USBIP_DIR_IN)
                 ep = udev->ep_in[epnum & 0x7f];
         else
                 ep = udev->ep_out[epnum & 0x7f];
-       if (!ep) {
-               dev_err(&sdev->udev->dev, "no such endpoint?, %d\n",
-                       epnum);
-               BUG();
-       }
+       if (!ep)
+               goto err_ret;
  
         epd = &ep->desc;
+
+       /* validate transfer_buffer_length */
+       if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) {
+               dev_err(&sdev->udev->dev,
+                       "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n",
+                       pdu->u.cmd_submit.transfer_buffer_length);
+               return -1;
+       }
+
         if (usb_endpoint_xfer_control(epd)) {
                 if (dir == USBIP_DIR_OUT)
                         return usb_sndctrlpipe(udev, epnum);
@@ -361,15 +372,31 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir)
         }
  
         if (usb_endpoint_xfer_isoc(epd)) {
+               /* validate packet size and number of packets */
+               unsigned int maxp, packets, bytes;
+
+               maxp = usb_endpoint_maxp(epd);
+               maxp *= usb_endpoint_maxp_mult(epd);
+               bytes = pdu->u.cmd_submit.transfer_buffer_length;
+               packets = DIV_ROUND_UP(bytes, maxp);
+
+               if (pdu->u.cmd_submit.number_of_packets < 0 ||
+                   pdu->u.cmd_submit.number_of_packets > packets) {
+                       dev_err(&sdev->udev->dev,
+                               "CMD_SUBMIT: isoc invalid num packets %d\n",
+                               pdu->u.cmd_submit.number_of_packets);
+                       return -1;
+               }
                 if (dir == USBIP_DIR_OUT)
                         return usb_sndisocpipe(udev, epnum);
                 else
                         return usb_rcvisocpipe(udev, epnum);
         }
  
+err_ret:
         /* NOT REACHED */
-       dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum);
-       return 0;
+       dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum);
+       return -1;
  }
  
  static void masking_bogus_flags(struct urb *urb)
@@ -433,7 +460,10 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
         struct stub_priv *priv;
         struct usbip_device *ud = &sdev->ud;
         struct usb_device *udev = sdev->udev;
-       int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction);
+       int pipe = get_pipe(sdev, pdu);
+
+       if (pipe == -1)
+               return;
  
         priv = stub_priv_alloc(sdev, pdu);
         if (!priv)
@@ -452,7 +482,8 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
         }
  
         /* allocate urb transfer buffer, if needed */
-       if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
+       if (pdu->u.cmd_submit.transfer_buffer_length > 0 &&
+           pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) {
                 priv->urb->transfer_buffer =
                         kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
                                 GFP_KERNEL);
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c

index b18bce96c212b35384d18ba108cbe98f27803cab..53172b1f6257cf9f8d72dac57212c0ec939a8dc4 100644 (file)
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -167,6 +167,13 @@ static int stub_send_ret_submit(struct stub_device *sdev)
                 memset(&pdu_header, 0, sizeof(pdu_header));
                 memset(&msg, 0, sizeof(msg));
  
+               if (urb->actual_length > 0 && !urb->transfer_buffer) {
+                       dev_err(&sdev->udev->dev,
+                               "urb: actual_length %d transfer_buffer null\n",
+                               urb->actual_length);
+                       return -1;
+               }
+
                 if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
                         iovnum = 2 + urb->number_of_packets;
                 else
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h

index e5de35c8c5056b7ab9c38c715cbf57b67a263a92..473fb8a872893caa3494fe9a4a85b60600bdfd28 100644 (file)
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -256,6 +256,7 @@ struct usbip_device {
         /* lock for status */
         spinlock_t lock;
  
+       int sockfd;
         struct socket *tcp_socket;
  
         struct task_struct *tcp_rx;
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c

index e78f7472cac496d4cfa2a5fcf0e4111568c242ab..091f76b7196d21da57d4adf1cca91fae29377fdd 100644 (file)
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -17,15 +17,20 @@
  
  /*
   * output example:
- * hub port sta spd dev      socket           local_busid
- * hs  0000 004 000 00000000         c5a7bb80 1-2.3
+ * hub port sta spd dev       sockfd    local_busid
+ * hs  0000 004 000 00000000  3         1-2.3
   * ................................................
- * ss  0008 004 000 00000000         d8cee980 2-3.4
+ * ss  0008 004 000 00000000  4         2-3.4
   * ................................................
   *
- * IP address can be retrieved from a socket pointer address by looking
- * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
- * port number and its peer IP address.
+ * Output includes socket fd instead of socket pointer address to avoid
+ * leaking kernel memory address in:
+ *     /sys/devices/platform/vhci_hcd.0/status and in debug output.
+ * The socket pointer address is not used at the moment and it was made
+ * visible as a convenient way to find IP address from socket pointer
+ * address by looking up /proc/net/{tcp,tcp6}. As this opens a security
+ * hole, the change is made to use sockfd instead.
+ *
   */
  static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vdev)
  {
@@ -39,8 +44,8 @@ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vd
         if (vdev->ud.status == VDEV_ST_USED) {
                 *out += sprintf(*out, "%03u %08x ",
                                       vdev->speed, vdev->devid);
-               *out += sprintf(*out, "%16p %s",
-                                     vdev->ud.tcp_socket,
+               *out += sprintf(*out, "%u %s",
+                                     vdev->ud.sockfd,
                                       dev_name(&vdev->udev->dev));
  
         } else {
@@ -160,7 +165,8 @@ static ssize_t nports_show(struct device *dev, struct device_attribute *attr,
         char *s = out;
  
         /*
-        * Half the ports are for SPEED_HIGH and half for SPEED_SUPER, thus the * 2.
+        * Half the ports are for SPEED_HIGH and half for SPEED_SUPER,
+        * thus the * 2.
          */
         out += sprintf(out, "%d\n", VHCI_PORTS * vhci_num_controllers);
         return out - s;
@@ -366,6 +372,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
  
         vdev->devid         = devid;
         vdev->speed         = speed;
+       vdev->ud.sockfd     = sockfd;
         vdev->ud.tcp_socket = socket;
         vdev->ud.status     = VDEV_ST_NOTASSIGNED;
  
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c

index a9192fe4f345e627e3f45010b4ecce434f1b04de..c92131edfabaad76355036f2afe13adbadf0b63d 100644 (file)
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -522,10 +522,8 @@ static int virtio_mmio_probe(struct platform_device *pdev)
                 return -EBUSY;
  
         vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
-       if (!vm_dev) {
-               rc = -ENOMEM;
-               goto free_mem;
-       }
+       if (!vm_dev)
+               return -ENOMEM;
  
         vm_dev->vdev.dev.parent = &pdev->dev;
         vm_dev->vdev.dev.release = virtio_mmio_release_dev;
@@ -535,17 +533,14 @@ static int virtio_mmio_probe(struct platform_device *pdev)
         spin_lock_init(&vm_dev->lock);
  
         vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
-       if (vm_dev->base == NULL) {
-               rc = -EFAULT;
-               goto free_vmdev;
-       }
+       if (vm_dev->base == NULL)
+               return -EFAULT;
  
         /* Check magic value */
         magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
         if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
                 dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
-               rc = -ENODEV;
-               goto unmap;
+               return -ENODEV;
         }
  
         /* Check device version */
@@ -553,8 +548,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
         if (vm_dev->version < 1 || vm_dev->version > 2) {
                 dev_err(&pdev->dev, "Version %ld not supported!\n",
                                 vm_dev->version);
-               rc = -ENXIO;
-               goto unmap;
+               return -ENXIO;
         }
  
         vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID);
@@ -563,8 +557,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
                  * virtio-mmio device with an ID 0 is a (dummy) placeholder
                  * with no function. End probing now with no error reported.
                  */
-               rc = -ENODEV;
-               goto unmap;
+               return -ENODEV;
         }
         vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
  
@@ -590,33 +583,15 @@ static int virtio_mmio_probe(struct platform_device *pdev)
         platform_set_drvdata(pdev, vm_dev);
  
         rc = register_virtio_device(&vm_dev->vdev);
-       if (rc) {
-               iounmap(vm_dev->base);
-               devm_release_mem_region(&pdev->dev, mem->start,
-                                       resource_size(mem));
+       if (rc)
                 put_device(&vm_dev->vdev.dev);
-       }
-       return rc;
-unmap:
-       iounmap(vm_dev->base);
-free_mem:
-       devm_release_mem_region(&pdev->dev, mem->start,
-                       resource_size(mem));
-free_vmdev:
-       devm_kfree(&pdev->dev, vm_dev);
+
         return rc;
  }
  
  static int virtio_mmio_remove(struct platform_device *pdev)
  {
         struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
-       struct resource *mem;
-
-       iounmap(vm_dev->base);
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (mem)
-               devm_release_mem_region(&pdev->dev, mem->start,
-                       resource_size(mem));
         unregister_virtio_device(&vm_dev->vdev);
  
         return 0;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig

index d8dd54678ab7100d32435fd68368531d98317375..e5d0c28372ea178a3177114694c7327f34089d24 100644 (file)
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -269,7 +269,7 @@ config XEN_ACPI_HOTPLUG_CPU
  
  config XEN_ACPI_PROCESSOR
         tristate "Xen ACPI processor"
-       depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
+       depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
         default m
         help
            This ACPI processor uploads Power Management information to the Xen
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c

index f77e499afdddb02c2d7595459a70b15f3a8c56d5..065f0b607373402a0d4dd7520b9820503390fb0a 100644 (file)
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
         kfree(resource);
  }
  
+/*
+ * Host memory not allocated to dom0. We can use this range for hotplug-based
+ * ballooning.
+ *
+ * It's a type-less resource. Setting IORESOURCE_MEM will make resource
+ * management algorithms (arch_remove_reservations()) look into guest e820,
+ * which we don't want.
+ */
+static struct resource hostmem_resource = {
+       .name   = "Host RAM",
+};
+
+void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
+{}
+
  static struct resource *additional_memory_resource(phys_addr_t size)
  {
-       struct resource *res;
-       int ret;
+       struct resource *res, *res_hostmem;
+       int ret = -ENOMEM;
  
         res = kzalloc(sizeof(*res), GFP_KERNEL);
         if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
         res->name = "System RAM";
         res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
  
-       ret = allocate_resource(&iomem_resource, res,
-                               size, 0, -1,
-                               PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-       if (ret < 0) {
-               pr_err("Cannot allocate new System RAM resource\n");
-               kfree(res);
-               return NULL;
+       res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
+       if (res_hostmem) {
+               /* Try to grab a range from hostmem */
+               res_hostmem->name = "Host memory";
+               ret = allocate_resource(&hostmem_resource, res_hostmem,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+       }
+
+       if (!ret) {
+               /*
+                * Insert this resource into iomem. Because hostmem_resource
+                * tracks portion of guest e820 marked as UNUSABLE noone else
+                * should try to use it.
+                */
+               res->start = res_hostmem->start;
+               res->end = res_hostmem->end;
+               ret = insert_resource(&iomem_resource, res);
+               if (ret < 0) {
+                       pr_err("Can't insert iomem_resource [%llx - %llx]\n",
+                               res->start, res->end);
+                       release_memory_resource(res_hostmem);
+                       res_hostmem = NULL;
+                       res->start = res->end = 0;
+               }
+       }
+
+       if (ret) {
+               ret = allocate_resource(&iomem_resource, res,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+               if (ret < 0) {
+                       pr_err("Cannot allocate new System RAM resource\n");
+                       kfree(res);
+                       return NULL;
+               }
         }
  
  #ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
                         pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
                                pfn, limit);
                         release_memory_resource(res);
+                       release_memory_resource(res_hostmem);
                         return NULL;
                 }
         }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
         set_online_page_callback(&xen_online_page);
         register_memory_notifier(&xen_memory_nb);
         register_sysctl_table(xen_root);
+
+       arch_xen_balloon_init(&hostmem_resource);
  #endif
  
  #ifdef CONFIG_XEN_PV
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c

index 8fc41705c7cd50af4c53f71851d8a4136673411b..961a12dc6dc81f369a71c36e62fbd604886ebf2c 100644 (file)
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
  
         mutex_unlock(&sbi->wq_mutex);
  
-       if (autofs4_write(sbi, pipe, &pkt, pktsz))
         switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
         case 0:
                 break;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index ab69dcb70e8ae342733f589338c02dc226f95356..1b468250e94752e6eedf63283cba95a236fb0380 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc,
         return request_close_session(mdsc, session);
  }
  
+static bool drop_negative_children(struct dentry *dentry)
+{
+       struct dentry *child;
+       bool all_negative = true;
+
+       if (!d_is_dir(dentry))
+               goto out;
+
+       spin_lock(&dentry->d_lock);
+       list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+               if (d_really_is_positive(child)) {
+                       all_negative = false;
+                       break;
+               }
+       }
+       spin_unlock(&dentry->d_lock);
+
+       if (all_negative)
+               shrink_dcache_parent(dentry);
+out:
+       return all_negative;
+}
+
  /*
   * Trim old(er) caps.
   *
@@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
         if ((used | wanted) & ~oissued & mine)
                 goto out;   /* we need these caps */
  
-       session->s_trim_caps--;
         if (oissued) {
                 /* we aren't the only cap.. just remove us */
                 __ceph_remove_cap(cap, true);
+               session->s_trim_caps--;
         } else {
+               struct dentry *dentry;
                 /* try dropping referring dentries */
                 spin_unlock(&ci->i_ceph_lock);
-               d_prune_aliases(inode);
-               dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
-                    inode, cap, atomic_read(&inode->i_count));
+               dentry = d_find_any_alias(inode);
+               if (dentry && drop_negative_children(dentry)) {
+                       int count;
+                       dput(dentry);
+                       d_prune_aliases(inode);
+                       count = atomic_read(&inode->i_count);
+                       if (count == 1)
+                               session->s_trim_caps--;
+                       dout("trim_caps_cb %p cap %p pruned, count now %d\n",
+                            inode, cap, count);
+               } else {
+                       dput(dentry);
+               }
                 return 0;
         }
  
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index e06740436b92080b915bd6c414fec38b0e868cd8..ed88ab8a477434b78aa94ace68c92794a6c4e082 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
         } while (rc == -EAGAIN);
  
         if (rc) {
-               cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
+               if (rc != -ENOENT)
+                       cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
                 goto out;
         }
  
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c

index 5331631386a23bd4a7458ecb5fb96efe1773cf71..01346b8b6edb38498c1b48c37e1c9210f4d5fe09 100644 (file)
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
         cifs_small_buf_release(req);
  
         rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
-       shdr = get_sync_hdr(rsp);
  
-       if (shdr->Status == STATUS_END_OF_FILE) {
+       if (rc) {
+               if (rc != -ENODATA) {
+                       cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
+                       cifs_dbg(VFS, "Send error in read = %d\n", rc);
+               }
                 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
-               return 0;
+               return rc == -ENODATA ? 0 : rc;
         }
  
-       if (rc) {
-               cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
-               cifs_dbg(VFS, "Send error in read = %d\n", rc);
-       } else {
-               *nbytes = le32_to_cpu(rsp->DataLength);
-               if ((*nbytes > CIFS_MAX_MSGSIZE) ||
-                   (*nbytes > io_parms->length)) {
-                       cifs_dbg(FYI, "bad length %d for count %d\n",
-                                *nbytes, io_parms->length);
-                       rc = -EIO;
-                       *nbytes = 0;
-               }
+       *nbytes = le32_to_cpu(rsp->DataLength);
+       if ((*nbytes > CIFS_MAX_MSGSIZE) ||
+           (*nbytes > io_parms->length)) {
+               cifs_dbg(FYI, "bad length %d for count %d\n",
+                        *nbytes, io_parms->length);
+               rc = -EIO;
+               *nbytes = 0;
         }
  
+       shdr = get_sync_hdr(rsp);
+
         if (*buf) {
                 memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
                 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig

index f937082f32449a9f3316e581675c47e51f7f6d30..58e2fe40b2a04423de26729613bae16c233c8920 100644 (file)
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV
  config CRAMFS_MTD
         bool "Support CramFs image directly mapped in physical memory"
         depends on CRAMFS && MTD
+       depends on CRAMFS=m || MTD=y
         default y if !CRAMFS_BLOCKDEV
         help
           This option allows the CramFs driver to load data directly from
diff --git a/fs/dax.c b/fs/dax.c

index 78b72c48374e5eed09587292f3b7eee62059e18b..95981591977a04d08f300c0795fcd96a4211adc1 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
  
                         if (pfn != pmd_pfn(*pmdp))
                                 goto unlock_pmd;
-                       if (!pmd_dirty(*pmdp)
-                                       && !pmd_access_permitted(*pmdp, WRITE))
+                       if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
                                 goto unlock_pmd;
  
                         flush_cache_page(vma, address, pfn);
diff --git a/fs/exec.c b/fs/exec.c

index 6be2aa0ab26fe26cb37032b99bba656f8d7c6b51..5688b5e1b9378107597a6117c8c3732889f951d2 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,15 +1216,14 @@ killed:
         return -EAGAIN;
  }
  
-char *get_task_comm(char *buf, struct task_struct *tsk)
+char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
  {
-       /* buf must be at least sizeof(tsk->comm) in size */
         task_lock(tsk);
-       strncpy(buf, tsk->comm, sizeof(tsk->comm));
+       strncpy(buf, tsk->comm, buf_size);
         task_unlock(tsk);
         return buf;
  }
-EXPORT_SYMBOL_GPL(get_task_comm);
+EXPORT_SYMBOL_GPL(__get_task_comm);
  
  /*
   * These functions flushes out all traces of the currently running executable
@@ -1340,15 +1339,10 @@ void setup_new_exec(struct linux_binprm * bprm)
                  * avoid bad behavior from the prior rlimits. This has to
                  * happen before arch_pick_mmap_layout(), which examines
                  * RLIMIT_STACK, but after the point of no return to avoid
-                * races from other threads changing the limits. This also
-                * must be protected from races with prlimit() calls.
+                * needing to clean up the change on failure.
                  */
-               task_lock(current->group_leader);
                 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
                         current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
-               if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
-                       current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
-               task_unlock(current->group_leader);
         }
  
         arch_pick_mmap_layout(current->mm);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c

index 07bca11749d406fd4e3130e31026f1db9ac0d879..c941251ac0c008587b1aaee10fb52e27a50e684a 100644 (file)
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4722,6 +4722,7 @@ retry:
                                                     EXT4_INODE_EOFBLOCKS);
                 }
                 ext4_mark_inode_dirty(handle, inode);
+               ext4_update_inode_fsync_trans(handle, inode, 1);
                 ret2 = ext4_journal_stop(handle);
                 if (ret2)
                         break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c

index b4267d72f24955c314d78f350c4247e6c7373cc2..b32cf263750d1d3b2024847e78bf1c6181e8a44f 100644 (file)
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
                 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
  
+               if (IS_ERR(p))
+                       return ERR_CAST(p);
                 if (p) {
                         int acl_size = p->a_count * sizeof(ext4_acl_entry);
  
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 7df2c5644e59c9678e9379985338f972a69d42d1..534a9130f62578931a24477f317c17b42c71ffc3 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
   */
  int ext4_inode_is_fast_symlink(struct inode *inode)
  {
+       if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+               int ea_blocks = EXT4_I(inode)->i_file_acl ?
+                               EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
+
+               if (ext4_has_inline_data(inode))
+                       return 0;
+
+               return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+       }
         return S_ISLNK(inode->i_mode) && inode->i_size &&
                (inode->i_size < EXT4_N_BLOCKS * 4);
  }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index 798b3ac680db1b4f8c4510a0bd66d0510d11216a..e750d68fbcb50c0447e13556905da8401f5f6b03 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
                                "falling back\n"));
         }
         nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+       if (!nblocks) {
+               ret = NULL;
+               goto cleanup_and_exit;
+       }
         start = EXT4_I(dir)->i_dir_start_lookup;
         if (start >= nblocks)
                 start = 0;
diff --git a/fs/namespace.c b/fs/namespace.c

index e158ec6b527b2d72341e096f76e628b5e61ea4cf..9d1374ab6e06f2cd7b57aedf196c53a745a1a683 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
                             SB_DIRSYNC |
                             SB_SILENT |
                             SB_POSIXACL |
+                           SB_LAZYTIME |
                             SB_I_VERSION);
  
         if (flags & MS_REMOUNT)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index 0ac2fb1c6b634626cf1f98f4423841ad103d08c2..b9129e2befeaa4186138bbd84e78d7ca2d128370 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
         const struct sockaddr *sap = data->addr;
         struct nfs_net *nn = net_generic(data->net, nfs_net_id);
  
+again:
         list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
                 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
                 /* Don't match clients that failed to initialise properly */
                 if (clp->cl_cons_state < 0)
                         continue;
  
+               /* If a client is still initializing then we need to wait */
+               if (clp->cl_cons_state > NFS_CS_READY) {
+                       refcount_inc(&clp->cl_count);
+                       spin_unlock(&nn->nfs_client_lock);
+                       nfs_wait_client_init_complete(clp);
+                       nfs_put_client(clp);
+                       spin_lock(&nn->nfs_client_lock);
+                       goto again;
+               }
+
                 /* Different NFS versions cannot share the same nfs_client */
                 if (clp->rpc_ops != data->nfs_mod->rpc_ops)
                         continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c

index 12bbab0becb420463bb37d4eefe0c2a9ec56796d..65a7e5da508c3e3019dab617b3ba92521b96cd0b 100644 (file)
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
         if (error < 0)
                 goto error;
  
-       if (!nfs4_has_session(clp))
-               nfs_mark_client_ready(clp, NFS_CS_READY);
-
         error = nfs4_discover_server_trunking(clp, &old);
         if (error < 0)
                 goto error;
  
-       if (clp != old)
+       if (clp != old) {
                 clp->cl_preserve_clid = true;
+               /*
+                * Mark the client as having failed initialization so other
+                * processes walking the nfs_client_list in nfs_match_client()
+                * won't try to use it.
+                */
+               nfs_mark_client_ready(clp, -EPERM);
+       }
         nfs_put_client(clp);
         clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
         return old;
@@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
         spin_lock(&nn->nfs_client_lock);
         list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
  
+               if (pos == new)
+                       goto found;
+
                 status = nfs4_match_client(pos, new, &prev, nn);
                 if (status < 0)
                         goto out_unlock;
@@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
                  * way that a SETCLIENTID_CONFIRM to pos can succeed is
                  * if new and pos point to the same server:
                  */
+found:
                 refcount_inc(&pos->cl_count);
                 spin_unlock(&nn->nfs_client_lock);
  
@@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
                 case 0:
                         nfs4_swap_callback_idents(pos, new);
                         pos->cl_confirm = new->cl_confirm;
+                       nfs_mark_client_ready(pos, NFS_CS_READY);
  
                         prev = NULL;
                         *result = pos;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index 5b5f464f6f2ada7cdfd2ea80d95d0c048ccee3f6..4a379d7918f23e1130468c2f58bfea3623035116 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how)
         if (res)
                 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
         nfs_commit_end(cinfo.mds);
+       if (res == 0)
+               return res;
         if (error < 0)
                 goto out_error;
         if (!may_wait)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c

index 697f8ae7792d1304e3cec035fd56185eb2ee02df..f650e475d8f0d84af1bb3013b6b35ef77421cde2 100644 (file)
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
                                 gi->gid[i] = exp->ex_anon_gid;
                         else
                                 gi->gid[i] = rqgi->gid[i];
+
+                       /* Each thread allocates its own gi, no race */
+                       groups_sort(gi);
                 }
         } else {
                 gi = get_group_info(rqgi);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig

index cbfc196e5dc53b2cb2376524c5955af20bd07ea6..5ac4154668613d158d63301272039ee5f5b814e4 100644 (file)
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR
           an overlay which has redirects on a kernel that doesn't support this
           feature will have unexpected results.
  
+config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
+       bool "Overlayfs: follow redirects even if redirects are turned off"
+       default y
+       depends on OVERLAY_FS
+       help
+         Disable this to get a possibly more secure configuration, but that
+         might not be backward compatible with previous kernels.
+
+         For more information, see Documentation/filesystems/overlayfs.txt
+
  config OVERLAY_FS_INDEX
         bool "Overlayfs: turn on inodes index feature by default"
         depends on OVERLAY_FS
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c

index e13921824c70ce8061e545f21e408eda478d7644..f9788bc116a8d1b5137f805d9955995e68b31884 100644 (file)
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
                 spin_unlock(&dentry->d_lock);
         } else {
                 kfree(redirect);
-               pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+               pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+                                   err);
                 /* Fall back to userspace copy-up */
                 err = -EXDEV;
         }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c

index 625ed8066570607b6140a0c22c39f152135a5c81..beb945e1963c0aac86fbce312ca9814821a1c33d 100644 (file)
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
  
         /* Check if index is orphan and don't warn before cleaning it */
         if (d_inode(index)->i_nlink == 1 &&
-           ovl_get_nlink(index, origin.dentry, 0) == 0)
+           ovl_get_nlink(origin.dentry, index, 0) == 0)
                 err = -ENOENT;
  
         dput(origin.dentry);
@@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                 if (d.stop)
                         break;
  
+               /*
+                * Following redirects can have security consequences: it's like
+                * a symlink into the lower layer without the permission checks.
+                * This is only a problem if the upper layer is untrusted (e.g
+                * comes from an USB drive).  This can allow a non-readable file
+                * or directory to become readable.
+                *
+                * Only following redirects when redirects are enabled disables
+                * this attack vector when not necessary.
+                */
+               err = -EPERM;
+               if (d.redirect && !ofs->config.redirect_follow) {
+                       pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
+                       goto out_put;
+               }
+
                 if (d.redirect && d.redirect[0] == '/' && poe != roe) {
                         poe = roe;
  
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h

index 13eab09a6b6f33c04c90ad822aa8a0dbe12a4927..b489099ccd493a54e231c9ee7e3ebe18809398b5 100644 (file)
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
  static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
  {
         struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
-       int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+       int err = PTR_ERR_OR_ZERO(ret);
  
         pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
         return ret;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h

index 752bab645879e5fce43e86d45e835d94d3c44b22..9d0bc03bf6e4563ef280fe5e8f695cc9f1e8abd5 100644 (file)
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,8 @@ struct ovl_config {
         char *workdir;
         bool default_permissions;
         bool redirect_dir;
+       bool redirect_follow;
+       const char *redirect_mode;
         bool index;
  };
  
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c

index 0daa4354fec4ae967da4ae43e81f7f833cd1095c..8c98578d27a1496922d1b862fa90af61da3dd290 100644 (file)
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -499,7 +499,7 @@ out:
         return err;
  
  fail:
-       pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n",
+       pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
                             p->name, err);
         goto out;
  }
@@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
                         return PTR_ERR(rdt.cache);
         }
  
-       return iterate_dir(od->realfile, &rdt.ctx);
+       err = iterate_dir(od->realfile, &rdt.ctx);
+       ctx->pos = rdt.ctx.pos;
+
+       return err;
  }
  
  
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c

index 288d20f9a55a220d3782f4eaf10e62c491c3fc5c..76440feb79f64ee0fce36713ed4fe4264ff70f11 100644 (file)
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  MODULE_PARM_DESC(ovl_redirect_dir_def,
                  "Default to on or off for the redirect_dir feature");
  
+static bool ovl_redirect_always_follow =
+       IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+                  bool, 0644);
+MODULE_PARM_DESC(ovl_redirect_always_follow,
+                "Follow redirects even if redirect_dir feature is turned off");
+
  static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  module_param_named(index, ovl_index_def, bool, 0644);
  MODULE_PARM_DESC(ovl_index_def,
@@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
         kfree(ofs->config.lowerdir);
         kfree(ofs->config.upperdir);
         kfree(ofs->config.workdir);
+       kfree(ofs->config.redirect_mode);
         if (ofs->creator_cred)
                 put_cred(ofs->creator_cred);
         kfree(ofs);
@@ -244,6 +252,7 @@ static void ovl_put_super(struct super_block *sb)
         ovl_free_fs(ofs);
  }
  
+/* Sync real dirty inodes in upper filesystem (if it exists) */
  static int ovl_sync_fs(struct super_block *sb, int wait)
  {
         struct ovl_fs *ofs = sb->s_fs_info;
@@ -252,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
  
         if (!ofs->upper_mnt)
                 return 0;
-       upper_sb = ofs->upper_mnt->mnt_sb;
-       if (!upper_sb->s_op->sync_fs)
+
+       /*
+        * If this is a sync(2) call or an emergency sync, all the super blocks
+        * will be iterated, including upper_sb, so no need to do anything.
+        *
+        * If this is a syncfs(2) call, then we do need to call
+        * sync_filesystem() on upper_sb, but enough if we do it when being
+        * called with wait == 1.
+        */
+       if (!wait)
                 return 0;
  
-       /* real inodes have already been synced by sync_filesystem(ovl_sb) */
+       upper_sb = ofs->upper_mnt->mnt_sb;
+
         down_read(&upper_sb->s_umount);
-       ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+       ret = sync_filesystem(upper_sb);
         up_read(&upper_sb->s_umount);
+
         return ret;
  }
  
@@ -295,6 +314,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs)
         return (!ofs->upper_mnt || !ofs->workdir);
  }
  
+static const char *ovl_redirect_mode_def(void)
+{
+       return ovl_redirect_dir_def ? "on" : "off";
+}
+
  /**
   * ovl_show_options
   *
@@ -313,12 +337,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
         }
         if (ofs->config.default_permissions)
                 seq_puts(m, ",default_permissions");
-       if (ofs->config.redirect_dir != ovl_redirect_dir_def)
-               seq_printf(m, ",redirect_dir=%s",
-                          ofs->config.redirect_dir ? "on" : "off");
+       if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
+               seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
         if (ofs->config.index != ovl_index_def)
-               seq_printf(m, ",index=%s",
-                          ofs->config.index ? "on" : "off");
+               seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
         return 0;
  }
  
@@ -348,8 +370,7 @@ enum {
         OPT_UPPERDIR,
         OPT_WORKDIR,
         OPT_DEFAULT_PERMISSIONS,
-       OPT_REDIRECT_DIR_ON,
-       OPT_REDIRECT_DIR_OFF,
+       OPT_REDIRECT_DIR,
         OPT_INDEX_ON,
         OPT_INDEX_OFF,
         OPT_ERR,
@@ -360,8 +381,7 @@ static const match_table_t ovl_tokens = {
         {OPT_UPPERDIR,                  "upperdir=%s"},
         {OPT_WORKDIR,                   "workdir=%s"},
         {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
-       {OPT_REDIRECT_DIR_ON,           "redirect_dir=on"},
-       {OPT_REDIRECT_DIR_OFF,          "redirect_dir=off"},
+       {OPT_REDIRECT_DIR,              "redirect_dir=%s"},
         {OPT_INDEX_ON,                  "index=on"},
         {OPT_INDEX_OFF,                 "index=off"},
         {OPT_ERR,                       NULL}
@@ -390,10 +410,37 @@ static char *ovl_next_opt(char **s)
         return sbegin;
  }
  
+static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
+{
+       if (strcmp(mode, "on") == 0) {
+               config->redirect_dir = true;
+               /*
+                * Does not make sense to have redirect creation without
+                * redirect following.
+                */
+               config->redirect_follow = true;
+       } else if (strcmp(mode, "follow") == 0) {
+               config->redirect_follow = true;
+       } else if (strcmp(mode, "off") == 0) {
+               if (ovl_redirect_always_follow)
+                       config->redirect_follow = true;
+       } else if (strcmp(mode, "nofollow") != 0) {
+               pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
+                      mode);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
  static int ovl_parse_opt(char *opt, struct ovl_config *config)
  {
         char *p;
  
+       config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
+       if (!config->redirect_mode)
+               return -ENOMEM;
+
         while ((p = ovl_next_opt(&opt)) != NULL) {
                 int token;
                 substring_t args[MAX_OPT_ARGS];
@@ -428,12 +475,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                         config->default_permissions = true;
                         break;
  
-               case OPT_REDIRECT_DIR_ON:
-                       config->redirect_dir = true;
-                       break;
-
-               case OPT_REDIRECT_DIR_OFF:
-                       config->redirect_dir = false;
+               case OPT_REDIRECT_DIR:
+                       kfree(config->redirect_mode);
+                       config->redirect_mode = match_strdup(&args[0]);
+                       if (!config->redirect_mode)
+                               return -ENOMEM;
                         break;
  
                 case OPT_INDEX_ON:
@@ -458,7 +504,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                 config->workdir = NULL;
         }
  
-       return 0;
+       return ovl_parse_redirect_mode(config, config->redirect_mode);
  }
  
  #define OVL_WORKDIR_NAME "work"
@@ -1160,7 +1206,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
         if (!cred)
                 goto out_err;
  
-       ofs->config.redirect_dir = ovl_redirect_dir_def;
         ofs->config.index = ovl_index_def;
         err = ovl_parse_opt((char *) data, &ofs->config);
         if (err)
diff --git a/fs/super.c b/fs/super.c

index d4e33e8f1e6fee3172e0e07e9d358587eea34bc4..7ff1349609e4874a35268876065490d77f5e01ab 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
  
         INIT_LIST_HEAD(&s->s_mounts);
         s->s_user_ns = get_user_ns(user_ns);
+       init_rwsem(&s->s_umount);
+       lockdep_set_class(&s->s_umount, &type->s_umount_key);
+       /*
+        * sget() can have s_umount recursion.
+        *
+        * When it cannot find a suitable sb, it allocates a new
+        * one (this one), and tries again to find a suitable old
+        * one.
+        *
+        * In case that succeeds, it will acquire the s_umount
+        * lock of the old one. Since these are clearly distrinct
+        * locks, and this object isn't exposed yet, there's no
+        * risk of deadlocks.
+        *
+        * Annotate this by putting this lock in a different
+        * subclass.
+        */
+       down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
  
         if (security_sb_alloc(s))
                 goto fail;
@@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
                 goto fail;
         if (list_lru_init_memcg(&s->s_inode_lru))
                 goto fail;
-
-       init_rwsem(&s->s_umount);
-       lockdep_set_class(&s->s_umount, &type->s_umount_key);
-       /*
-        * sget() can have s_umount recursion.
-        *
-        * When it cannot find a suitable sb, it allocates a new
-        * one (this one), and tries again to find a suitable old
-        * one.
-        *
-        * In case that succeeds, it will acquire the s_umount
-        * lock of the old one. Since these are clearly distrinct
-        * locks, and this object isn't exposed yet, there's no
-        * risk of deadlocks.
-        *
-        * Annotate this by putting this lock in a different
-        * subclass.
-        */
-       down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
         s->s_count = 1;
         atomic_set(&s->s_active, 1);
         mutex_init(&s->s_vfs_rename_mutex);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index 0da80019a9173cba18811abc71e39ca4631a7449..83ed7715f856d2025509c308583436e63d0043a5 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
         ASSERT(args->agbno % args->alignment == 0);
  
         /* if not file data, insert new block into the reverse map btree */
-       if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
                 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
                                        args->agbno, args->len, &args->oinfo);
                 if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
         bno_cur = cnt_cur = NULL;
         mp = tp->t_mountp;
  
-       if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(oinfo)) {
                 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
                 if (error)
                         goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c

index 6249c92671debe20a45e3cb0577360552e36d523..a76914db72ef11094cd8d74e5bd6cd6add2a3fde 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
         int                     flags)
  {
         struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *leaf_bp = NULL;
         struct xfs_da_args      args;
         struct xfs_defer_ops    dfops;
         struct xfs_trans_res    tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
                  * GROT: another possible req'mt for a double-split btree op.
                  */
                 xfs_defer_init(args.dfops, args.firstblock);
-               error = xfs_attr_shortform_to_leaf(&args);
+               error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
                 if (error)
                         goto out_defer_cancel;
+               /*
+                * Prevent the leaf buffer from being unlocked so that a
+                * concurrent AIL push cannot grab the half-baked leaf
+                * buffer and run into problems with the write verifier.
+                */
+               xfs_trans_bhold(args.trans, leaf_bp);
+               xfs_defer_bjoin(args.dfops, leaf_bp);
                 xfs_defer_ijoin(args.dfops, dp);
                 error = xfs_defer_finish(&args.trans, args.dfops);
                 if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
  
                 /*
                  * Commit the leaf transformation.  We'll need another (linked)
-                * transaction to add the new attribute to the leaf.
+                * transaction to add the new attribute to the leaf, which
+                * means that we have to hold & join the leaf buffer here too.
                  */
-
                 error = xfs_trans_roll_inode(&args.trans, dp);
                 if (error)
                         goto out;
-
+               xfs_trans_bjoin(args.trans, leaf_bp);
+               leaf_bp = NULL;
         }
  
         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
  
  out_defer_cancel:
         xfs_defer_cancel(&dfops);
-       args.trans = NULL;
  out:
+       if (leaf_bp)
+               xfs_trans_brelse(args.trans, leaf_bp);
         if (args.trans)
                 xfs_trans_cancel(args.trans);
         xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c

index 53cc8b986eac45c4e5ec4afd319891b6e9f716c5..601eaa36f1ada22e2213f9178bcb5cdb5868034d 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
  }
  
  /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
   */
  int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+       struct xfs_da_args      *args,
+       struct xfs_buf          **leaf_bp)
  {
         xfs_inode_t *dp;
         xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
                 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
         }
         error = 0;
-
+       *leaf_bp = bp;
  out:
         kmem_free(tmpbuffer);
         return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h

index f7dda0c237b044b166d6d178fca3178feff2b644..894124efb421e0d0674b0f39bf63dabfe7916937 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void  xfs_attr_shortform_create(struct xfs_da_args *args);
  void   xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
  int    xfs_attr_shortform_lookup(struct xfs_da_args *args);
  int    xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+                       struct xfs_buf **leaf_bp);
  int    xfs_attr_shortform_remove(struct xfs_da_args *args);
  int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
  int    xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index 1210f684d3c28f9af8d8403c1f0222ef06dc380b..1bddbba6b80c960bdcc10c9a30210c119e1b2f77 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
          * blowing out the transaction with a mix of EFIs and reflink
          * adjustments.
          */
-       if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+       if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
                 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
         else
                 max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c

index 072ebfe1d6aeb3e00e306a06d71a1b478382f3ad..087fea02c3892c34e1e63df2839bd8d804ba1f3f 100644 (file)
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
         for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
  
+       /* Hold the (previously bjoin'd) buffer locked across the roll. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+               xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
         trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
  
         /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
         for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
  
+       /* Rejoin the buffers and dirty them so the log moves forward. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+               xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+               xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+       }
+
         return error;
  }
  
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
                 }
         }
  
+       ASSERT(0);
+       return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op.  Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+       struct xfs_defer_ops            *dop,
+       struct xfs_buf                  *bp)
+{
+       int                             i;
+
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+               if (dop->dop_bufs[i] == bp)
+                       return 0;
+               else if (dop->dop_bufs[i] == NULL) {
+                       dop->dop_bufs[i] = bp;
+                       return 0;
+               }
+       }
+
+       ASSERT(0);
         return -EFSCORRUPTED;
  }
  
@@ -493,9 +528,7 @@ xfs_defer_init(
         struct xfs_defer_ops            *dop,
         xfs_fsblock_t                   *fbp)
  {
-       dop->dop_committed = false;
-       dop->dop_low = false;
-       memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+       memset(dop, 0, sizeof(struct xfs_defer_ops));
         *fbp = NULLFSBLOCK;
         INIT_LIST_HEAD(&dop->dop_intake);
         INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h

index d4f046dd44bd4ae434d8104991d0327f2d2b9fc7..045beacdd37d81c9e01e0ab46ab2bbcbbb581fbb 100644 (file)
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
  };
  
  #define XFS_DEFER_OPS_NR_INODES        2       /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS  2       /* join up to two buffers */
  
  struct xfs_defer_ops {
         bool                    dop_committed;  /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
         struct list_head        dop_intake;     /* unlogged pending work */
         struct list_head        dop_pending;    /* logged pending work */
  
-       /* relog these inodes with each roll */
+       /* relog these with each roll */
         struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+       struct xfs_buf          *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
  };
  
  void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
  void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
  bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
  int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
  
  /* Description of a deferred type. */
  struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index de3f04a986565d7265fc694b80962000de8d7dc8..3b57ef0f2f76c758e6a9c8b89b7a0c470cdd09a9 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
  xfs_ialloc_ag_select(
         xfs_trans_t     *tp,            /* transaction pointer */
         xfs_ino_t       parent,         /* parent directory inode number */
-       umode_t         mode,           /* bits set to indicate file type */
-       int             okalloc)        /* ok to allocate more space */
+       umode_t         mode)           /* bits set to indicate file type */
  {
         xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
         xfs_agnumber_t  agno;           /* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
                         return agno;
                 }
  
-               if (!okalloc)
-                       goto nextag;
-
                 if (!pag->pagf_init) {
                         error = xfs_alloc_pagf_init(mp, tp, agno, flags);
                         if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
         struct xfs_trans        *tp,
         xfs_ino_t               parent,
         umode_t                 mode,
-       int                     okalloc,
         struct xfs_buf          **IO_agbp,
         xfs_ino_t               *inop)
  {
@@ -1692,6 +1687,7 @@ xfs_dialloc(
         int                     noroom = 0;
         xfs_agnumber_t          start_agno;
         struct xfs_perag        *pag;
+       int                     okalloc = 1;
  
         if (*IO_agbp) {
                 /*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
          * We do not have an agbp, so select an initial allocation
          * group for inode allocation.
          */
-       start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+       start_agno = xfs_ialloc_ag_select(tp, parent, mode);
         if (start_agno == NULLAGNUMBER) {
                 *inop = NULLFSINO;
                 return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h

index d2bdcd5e7312e499deb91b29bc62cb91cc35e881..66a8de0b1caaad8d1ba9d5ed94fccc813780dca3 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -81,7 +81,6 @@ xfs_dialloc(
         struct xfs_trans *tp,           /* transaction pointer */
         xfs_ino_t       parent,         /* parent inode (directory) */
         umode_t         mode,           /* mode bits for new inode */
-       int             okalloc,        /* ok to allocate more space */
         struct xfs_buf  **agbp,         /* buf for a.g. inode header */
         xfs_ino_t       *inop);         /* inode number allocated */
  
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c

index 89bf16b4d9377293fa842c48f2b9b637f83c62c2..b0f31791c7e6137c0b7e46c35000e3c76e145ba1 100644 (file)
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
         struct xfs_iext_leaf    *new = NULL;
         int                     nr_entries, i;
  
-       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
-
         if (ifp->if_height == 0)
                 xfs_iext_alloc_root(ifp, cur);
         else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
         xfs_iext_set(cur_rec(cur), irec);
         ifp->if_bytes += sizeof(struct xfs_iext_rec);
  
+       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
+
         if (new)
                 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
  }
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c

index 585b35d34142157863740d3b8cd437b37e49e05d..c40d26763075307b064d49bd3cb48dfce8dd5b67 100644 (file)
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
         xfs_extlen_t            aglen,
         struct xfs_defer_ops    *dfops)
  {
-       int                     error;
-
         trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
                         agbno, aglen);
  
         /* Add refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                         XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
-       if (error)
-               return error;
-
-       /* Add rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
  }
  
  /*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
         xfs_extlen_t            aglen,
         struct xfs_defer_ops    *dfops)
  {
-       int                     error;
-
         trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
                         agbno, aglen);
  
         /* Remove refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                         XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
-       if (error)
-               return error;
-
-       /* Remove rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
  }
  
  /* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
         xfs_fsblock_t                   fsb,
         xfs_extlen_t                    len)
  {
+       int                             error;
+
         if (!xfs_sb_version_hasreflink(&mp->m_sb))
                 return 0;
  
-       return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+       error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
                         fsb, len);
+       if (error)
+               return error;
+
+       /* Add rmap entry */
+       return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
  }
  
  /* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
         xfs_fsblock_t                   fsb,
         xfs_extlen_t                    len)
  {
+       int                             error;
+
         if (!xfs_sb_version_hasreflink(&mp->m_sb))
                 return 0;
  
+       /* Remove rmap entry */
+       error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+       if (error)
+               return error;
+
         return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
                         fsb, len);
  }
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c

index dd019cee1b3bdccf4e08b25fd70b2c59220859a1..50db920ceeebbf077c2b3b13690066173ba7cf4e 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range(
         return error;
  }
  
+/*
+ * Perform all the relevant owner checks for a removal op.  If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+       struct xfs_mount        *mp,
+       uint64_t                ltoff,
+       struct xfs_rmap_irec    *rec,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags)
+{
+       int                     error = 0;
+
+       if (owner == XFS_RMAP_OWN_UNKNOWN)
+               return 0;
+
+       /* Make sure the unwritten flag matches. */
+       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+                       (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+
+       /* Make sure the owner matches what we expect to find in the tree. */
+       XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+
+       /* Check the offset, if necessary. */
+       if (XFS_RMAP_NON_INODE_OWNER(owner))
+               goto out;
+
+       if (flags & XFS_RMAP_BMBT_BLOCK) {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+                               out);
+       } else {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                               ltoff + rec->rm_blockcount >= offset + len,
+                               out);
+       }
+
+out:
+       return error;
+}
+
  /*
   * Find the extent in the rmap btree and remove it.
   *
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
                 goto out_done;
         }
  
-       /* Make sure the unwritten flag matches. */
-       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
-                       (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+       /*
+        * If we're doing an unknown-owner removal for EFI recovery, we expect
+        * to find the full range in the rmapbt or nothing at all.  If we
+        * don't find any rmaps overlapping either end of the range, we're
+        * done.  Hopefully this means that the EFI creator already queued
+        * (and finished) a RUI to remove the rmap.
+        */
+       if (owner == XFS_RMAP_OWN_UNKNOWN &&
+           ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+               struct xfs_rmap_irec    rtrec;
+
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto out_error;
+               if (i == 0)
+                       goto out_done;
+               error = xfs_rmap_get_rec(cur, &rtrec, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+               if (rtrec.rm_startblock >= bno + len)
+                       goto out_done;
+       }
  
         /* Make sure the extent we found covers the entire freeing range. */
         XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
-               ltrec.rm_startblock + ltrec.rm_blockcount >=
-               bno + len, out_error);
+                       ltrec.rm_startblock + ltrec.rm_blockcount >=
+                       bno + len, out_error);
  
-       /* Make sure the owner matches what we expect to find in the tree. */
-       XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
-                                   XFS_RMAP_NON_INODE_OWNER(owner), out_error);
-
-       /* Check the offset, if necessary. */
-       if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
-               if (flags & XFS_RMAP_BMBT_BLOCK) {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
-                                       out_error);
-               } else {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_offset <= offset, out_error);
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltoff + ltrec.rm_blockcount >= offset + len,
-                                       out_error);
-               }
-       }
+       /* Check owner information. */
+       error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+                       offset, flags);
+       if (error)
+               goto out_error;
  
         if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
                 /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
                 flags |= XFS_RMAP_UNWRITTEN;
         trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
                         unwritten, oinfo);
+       ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
  
         /*
          * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h

index 466ede637080e5832046a96d62789eb2e46ed03f..0fcd5b1ba7295379081e0c61d230324447a8ae56 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
  xfs_rmap_skip_owner_update(
         struct xfs_owner_info   *oi)
  {
-       oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+
+static inline bool
+xfs_rmap_should_skip_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+
+static inline void
+xfs_rmap_any_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
  }
  
  /* Reverse mapping functions. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c

index 9c42c4efd01ec57bb1945ba671780cc2687e49c3..ab3aef2ae8233350f42647b77c0dff833521fe2f 100644 (file)
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -46,7 +46,6 @@
  #include "scrub/scrub.h"
  #include "scrub/common.h"
  #include "scrub/trace.h"
-#include "scrub/scrub.h"
  #include "scrub/btree.h"
  
  /*
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c

index 472080e757887957b5cd7baf9f12a10d6b27ae71..86daed0e3a458dd16ab76251b2772400be1e9409 100644 (file)
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -26,7 +26,6 @@
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_da_format.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_trans.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c

index 44f8c54512102577dbe768e5f137298ba1ee79c7..64da90655e957c3fd01331720aa32093909ddad6 100644 (file)
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
                 return error;
         efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
  
-       xfs_rmap_skip_owner_update(&oinfo);
+       xfs_rmap_any_owner_update(&oinfo);
         for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                 extp = &efip->efi_format.efi_extents[i];
                 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 8f22fc579dbba4abf9b609040802d24e9cf2f732..60a2e128cb6a59aa7181faa5c519d511a308bc5c 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
                  * this doesn't actually exist in the rmap btree.
                  */
                 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+               error = xfs_rmap_free(tp, bp, agno,
+                               be32_to_cpu(agf->agf_length) - new,
+                               new, &oinfo);
+               if (error)
+                       goto error0;
                 error = xfs_free_extent(tp,
                                 XFS_AGB_TO_FSB(mp, agno,
                                         be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 43005fbe8b1eefabc84ee762a9427ec784889814..3861d61fb265f66a9d39723286d5adc9772cc15e 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
   * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
   * (We'll just piggyback on the post-EOF prealloc space workqueue.)
   */
-STATIC void
+void
  xfs_queue_cowblocks(
         struct xfs_mount *mp)
  {
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
         return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
  }
  
+static inline unsigned long
+xfs_iflag_for_tag(
+       int             tag)
+{
+       switch (tag) {
+       case XFS_ICI_EOFBLOCKS_TAG:
+               return XFS_IEOFBLOCKS;
+       case XFS_ICI_COWBLOCKS_TAG:
+               return XFS_ICOWBLOCKS;
+       default:
+               ASSERT(0);
+               return 0;
+       }
+}
+
  static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
         xfs_inode_t     *ip,
         void            (*execute)(struct xfs_mount *mp),
         void            (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
          * Don't bother locking the AG and looking up in the radix trees
          * if we already know that we have the tag set.
          */
-       if (ip->i_flags & XFS_IEOFBLOCKS)
+       if (ip->i_flags & xfs_iflag_for_tag(tag))
                 return;
         spin_lock(&ip->i_flags_lock);
-       ip->i_flags |= XFS_IEOFBLOCKS;
+       ip->i_flags |= xfs_iflag_for_tag(tag);
         spin_unlock(&ip->i_flags_lock);
  
         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
         xfs_inode_t     *ip)
  {
         trace_xfs_inode_set_eofblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
                         trace_xfs_perag_set_eofblocks,
                         XFS_ICI_EOFBLOCKS_TAG);
  }
  
  static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
         xfs_inode_t     *ip,
         void            (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
                                     int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
         struct xfs_perag *pag;
  
         spin_lock(&ip->i_flags_lock);
-       ip->i_flags &= ~XFS_IEOFBLOCKS;
+       ip->i_flags &= ~xfs_iflag_for_tag(tag);
         spin_unlock(&ip->i_flags_lock);
  
         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
         xfs_inode_t     *ip)
  {
         trace_xfs_inode_clear_eofblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                         trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
  }
  
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
         xfs_inode_t     *ip)
  {
         trace_xfs_inode_set_cowblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
                         trace_xfs_perag_set_cowblocks,
                         XFS_ICI_COWBLOCKS_TAG);
  }
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
         xfs_inode_t     *ip)
  {
         trace_xfs_inode_clear_cowblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                         trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
  }
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h

index bff4d85e54984ad84ca0741f801f536a99195055..d4a77588eca15b90639debc1ca6c5c62a3680de8 100644 (file)
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
  int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
  int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
  void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
  
  int xfs_inode_ag_iterator(struct xfs_mount *mp,
         int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 8012741266488ab4e0724b68aadb2742c1d29c2c..6f95bdb408ced01b9471b931714d279003a22d92 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -749,7 +749,6 @@ xfs_ialloc(
         xfs_nlink_t     nlink,
         dev_t           rdev,
         prid_t          prid,
-       int             okalloc,
         xfs_buf_t       **ialloc_context,
         xfs_inode_t     **ipp)
  {
@@ -765,7 +764,7 @@ xfs_ialloc(
          * Call the space management code to pick
          * the on-disk inode to be allocated.
          */
-       error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+       error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
                             ialloc_context, &ino);
         if (error)
                 return error;
@@ -957,7 +956,6 @@ xfs_dir_ialloc(
         xfs_nlink_t     nlink,
         dev_t           rdev,
         prid_t          prid,           /* project id */
-       int             okalloc,        /* ok to allocate new space */
         xfs_inode_t     **ipp,          /* pointer to inode; it will be
                                            locked. */
         int             *committed)
@@ -988,8 +986,8 @@ xfs_dir_ialloc(
          * transaction commit so that no other process can steal
          * the inode(s) that we've just allocated.
          */
-       code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
-                         &ialloc_context, &ip);
+       code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
+                       &ip);
  
         /*
          * Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
                  * this call should always succeed.
                  */
                 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-                                 okalloc, &ialloc_context, &ip);
+                                 &ialloc_context, &ip);
  
                 /*
                  * If we get an error at this point, return to the caller
@@ -1182,11 +1180,6 @@ xfs_create(
                 xfs_flush_inodes(mp);
                 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
         }
-       if (error == -ENOSPC) {
-               /* No space at all so try a "no-allocation" reservation */
-               resblks = 0;
-               error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-       }
         if (error)
                 goto out_release_inode;
  
@@ -1203,19 +1196,13 @@ xfs_create(
         if (error)
                 goto out_trans_cancel;
  
-       if (!resblks) {
-               error = xfs_dir_canenter(tp, dp, name);
-               if (error)
-                       goto out_trans_cancel;
-       }
-
         /*
          * A newly created regular or special file just has one directory
          * entry pointing to them, but a directory also the "." entry
          * pointing to itself.
          */
-       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
-                              prid, resblks > 0, &ip, NULL);
+       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
+                       NULL);
         if (error)
                 goto out_trans_cancel;
  
@@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
         tres = &M_RES(mp)->tr_create_tmpfile;
  
         error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
-       if (error == -ENOSPC) {
-               /* No space at all so try a "no-allocation" reservation */
-               resblks = 0;
-               error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-       }
         if (error)
                 goto out_release_inode;
  
@@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
         if (error)
                 goto out_trans_cancel;
  
-       error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
-                               prid, resblks > 0, &ip, NULL);
+       error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
         if (error)
                 goto out_trans_cancel;
  
@@ -1506,6 +1487,24 @@ xfs_link(
         return error;
  }
  
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+       struct xfs_inode        *ip)
+{
+       struct xfs_ifork        *dfork;
+       struct xfs_ifork        *cfork;
+
+       if (!xfs_is_reflink_inode(ip))
+               return;
+       dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       if (cfork->if_bytes == 0)
+               xfs_inode_clear_cowblocks_tag(ip);
+}
+
  /*
   * Free up the underlying blocks past new_size.  The new size must be smaller
   * than the current size.  This routine can be used both for the attribute and
@@ -1602,15 +1601,7 @@ xfs_itruncate_extents(
         if (error)
                 goto out;
  
-       /*
-        * Clear the reflink flag if there are no data fork blocks and
-        * there are no extents staged in the cow fork.
-        */
-       if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-               if (ip->i_d.di_nblocks == 0)
-                       ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
-               xfs_inode_clear_cowblocks_tag(ip);
-       }
+       xfs_itruncate_clear_reflink_flags(ip);
  
         /*
          * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index cc13c37637217e74e4c9425b34a710a5fd55e090..d383e392ec9ddcca6f552dc8c4cfe5329373d6c9 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
   * log recovery to replay a bmap operation on the inode.
   */
  #define XFS_IRECOVERY          (1 << 11)
+#define XFS_ICOWBLOCKS         (1 << 12)/* has the cowblocks tag set */
  
  /*
   * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -428,7 +429,7 @@ xfs_extlen_t        xfs_get_extsz_hint(struct xfs_inode *ip);
  xfs_extlen_t   xfs_get_cowextsz_hint(struct xfs_inode *ip);
  
  int            xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
-                              xfs_nlink_t, dev_t, prid_t, int,
+                              xfs_nlink_t, dev_t, prid_t,
                                struct xfs_inode **, int *);
  
  /* from xfs_file.c */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 33eb4fb2e3fd87b0848ebe2599b0a71b05f0d082..7ab52a8bc0a9e6dff904fe10b097eb3a478db9b9 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(
  
         ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
         error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
-                              &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+                              &nimaps, XFS_BMAPI_ATTRFORK);
  out_unlock:
         xfs_iunlock(ip, lockmode);
  
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index 010a13a201aad78382ae69fdcc8e3b8d5c451c1d..ec952dfad359f6ad08d33d234f0cd75200c5933b 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -793,8 +793,8 @@ xfs_qm_qino_alloc(
                 return error;
  
         if (need_alloc) {
-               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
-                                                               &committed);
+               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
+                               &committed);
                 if (error) {
                         xfs_trans_cancel(tp);
                         return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index cc041a29eb70bbb7524e036c0e24843099480617..47aea2e82c268f4bbf9c25c1c1c6f3821c11caa3 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
  #include "xfs_alloc.h"
  #include "xfs_quota_defs.h"
  #include "xfs_quota.h"
-#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
  #include "xfs_reflink.h"
  #include "xfs_iomap.h"
  #include "xfs_rmap_btree.h"
@@ -456,6 +454,8 @@ retry:
         if (error)
                 goto out_bmap_cancel;
  
+       xfs_inode_set_cowblocks_tag(ip);
+
         /* Finish up. */
         error = xfs_defer_finish(&tp, &dfops);
         if (error)
@@ -492,8 +492,9 @@ xfs_reflink_find_cow_mapping(
         struct xfs_iext_cursor          icur;
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-       ASSERT(xfs_is_reflink_inode(ip));
  
+       if (!xfs_is_reflink_inode(ip))
+               return false;
         offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
         if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
                 return false;
@@ -612,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
  
                         /* Remove the mapping from the CoW fork. */
                         xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+               } else {
+                       /* Didn't do anything, push cursor back. */
+                       xfs_iext_prev(ifp, &icur);
                 }
  next_extent:
                 if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -727,7 +731,7 @@ xfs_reflink_end_cow(
                         (unsigned int)(end_fsb - offset_fsb),
                         XFS_DATA_FORK);
         error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                       resblks, 0, 0, &tp);
+                       resblks, 0, XFS_TRANS_RESERVE, &tp);
         if (error)
                 goto out;
  
@@ -1293,6 +1297,17 @@ xfs_reflink_remap_range(
  
         trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
  
+       /*
+        * Clear out post-eof preallocations because we don't have page cache
+        * backing the delayed allocations and they'll never get freed on
+        * their own.
+        */
+       if (xfs_can_free_eofblocks(dest, true)) {
+               ret = xfs_free_eofblocks(dest);
+               if (ret)
+                       goto out_unlock;
+       }
+
         /* Set flags and remap blocks. */
         ret = xfs_reflink_set_inode_flag(src, dest);
         if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 5122d3021117f00e20d6dd1e195c28666cc71076..1dacccc367f81725a678ea3a6ed50528a731920d 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                         return error;
                 }
+               xfs_queue_cowblocks(mp);
  
                 /* Create the per-AG metadata reservation pool .*/
                 error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
  
         /* rw -> ro */
         if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+               /* Get rid of any leftover CoW reservations... */
+               cancel_delayed_work_sync(&mp->m_cowblocks_work);
+               error = xfs_icache_free_cowblocks(mp, NULL);
+               if (error) {
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                       return error;
+               }
+
                 /* Free the per-AG metadata reservation pool. */
                 error = xfs_fs_unreserve_ag_blocks(mp);
                 if (error) {
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index 68d3ca2c4968054646345dab2bd3a76f97490fb8..2e9e793a8f9dfa18e87078bce5133860d4de6d4d 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -232,11 +232,6 @@ xfs_symlink(
         resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
  
         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
-       if (error == -ENOSPC && fs_blocks == 0) {
-               resblks = 0;
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
-                               &tp);
-       }
         if (error)
                 goto out_release_inode;
  
@@ -259,14 +254,6 @@ xfs_symlink(
         if (error)
                 goto out_trans_cancel;
  
-       /*
-        * Check for ability to enter directory entry, if no space reserved.
-        */
-       if (!resblks) {
-               error = xfs_dir_canenter(tp, dp, link_name);
-               if (error)
-                       goto out_trans_cancel;
-       }
         /*
          * Initialize the bmap freelist prior to calling either
          * bmapi or the directory create code.
@@ -277,7 +264,7 @@ xfs_symlink(
          * Allocate an inode for the symlink.
          */
         error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-                              prid, resblks > 0, &ip, NULL);
+                              prid, &ip, NULL);
         if (error)
                 goto out_trans_cancel;
  
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c

index 5d95fe34829438a0a2486bc50278d84b0d0714bf..35f3546b6af5237a78a9fe1bd3aa07cbb8503349 100644 (file)
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -24,7 +24,6 @@
  #include "xfs_mount.h"
  #include "xfs_defer.h"
  #include "xfs_da_format.h"
-#include "xfs_defer.h"
  #include "xfs_inode.h"
  #include "xfs_btree.h"
  #include "xfs_da_btree.h"
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h

index ea189d88a3cc761e0239043785e00d9c6d8402f5..8ac4e68a12f08e4e00c1fcf850f0f8c97188d390 100644 (file)
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -7,9 +7,10 @@
  #ifndef _ASM_GENERIC_MM_HOOKS_H
  #define _ASM_GENERIC_MM_HOOKS_H
  
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
  {
+       return 0;
  }
  
  static inline void arch_exit_mmap(struct mm_struct *mm)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h

index b234d54f2cb6e4c23a21db2af3b225264eccae2a..868e68561f913ecaec80ddf05f02816767ea5a17 100644 (file)
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
  struct file;
  int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                         unsigned long size, pgprot_t *vma_prot);
+
+#ifndef CONFIG_X86_ESPFIX64
+static inline void init_espfix_bsp(void) { }
+#endif
+
  #endif /* !__ASSEMBLY__ */
  
  #ifndef io_remap_pfn_range
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h

index f0b44c16e88f241721a4296019475abae6b7a3b0..c2bae8da642cbaef97f3de444a446a27df15dc18 100644 (file)
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -82,6 +82,14 @@ int ahash_register_instance(struct crypto_template *tmpl,
                             struct ahash_instance *inst);
  void ahash_free_instance(struct crypto_instance *inst);
  
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+                   unsigned int keylen);
+
+static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+       return alg->setkey != shash_no_setkey;
+}
+
  int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
                             struct hash_alg_common *alg,
                             struct crypto_instance *inst);
diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h

index cceafa01f9073293cf4e813247b54bbe094728e3..b67404fc4b34bab495086b4b1e969f53a0921b39 100644 (file)
--- a/include/crypto/mcryptd.h
+++ b/include/crypto/mcryptd.h
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
  
  struct mcryptd_cpu_queue {
         struct crypto_queue queue;
+       spinlock_t q_lock;
         struct work_struct work;
  };
  
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h

index a4649c56ca2ffaa3a428040ebf97772c0fa18fc1..5971577016a2b09bd484883e609860248ec57387 100644 (file)
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -24,6 +24,7 @@
  #define __DRM_CONNECTOR_H__
  
  #include <linux/list.h>
+#include <linux/llist.h>
  #include <linux/ctype.h>
  #include <linux/hdmi.h>
  #include <drm/drm_mode_object.h>
@@ -918,12 +919,13 @@ struct drm_connector {
         uint16_t tile_h_size, tile_v_size;
  
         /**
-        * @free_work:
+        * @free_node:
          *
-        * Work used only by &drm_connector_iter to be able to clean up a
-        * connector from any context.
+        * List used only by &drm_connector_iter to be able to clean up a
+        * connector from any context, in conjunction with
+        * &drm_mode_config.connector_free_work.
          */
-       struct work_struct free_work;
+       struct llist_node free_node;
  };
  
  #define obj_to_connector(x) container_of(x, struct drm_connector, base)
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h

index 2ec41d032e560f0fa4d04ef8d23d6b3442529473..efe6d5a8e834168a82ded86e7c93341d93305b2d 100644 (file)
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -465,6 +465,8 @@ struct edid *drm_get_edid(struct drm_connector *connector,
  struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
                                      struct i2c_adapter *adapter);
  struct edid *drm_edid_duplicate(const struct edid *edid);
+void drm_reset_display_info(struct drm_connector *connector);
+u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid);
  int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
  
  u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h

index b21e827c5c78775742533d28f3baebfd6e0a9b5e..b0ce26d71296df77c835c6520a43c99b3b93b363 100644 (file)
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -27,6 +27,7 @@
  #include <linux/types.h>
  #include <linux/idr.h>
  #include <linux/workqueue.h>
+#include <linux/llist.h>
  
  #include <drm/drm_modeset_lock.h>
  
@@ -393,7 +394,7 @@ struct drm_mode_config {
  
         /**
          * @connector_list_lock: Protects @num_connector and
-        * @connector_list.
+        * @connector_list and @connector_free_list.
          */
         spinlock_t connector_list_lock;
         /**
@@ -413,6 +414,21 @@ struct drm_mode_config {
          * &struct drm_connector_list_iter to walk this list.
          */
         struct list_head connector_list;
+       /**
+        * @connector_free_list:
+        *
+        * List of connector objects linked with &drm_connector.free_head.
+        * Protected by @connector_list_lock. Used by
+        * drm_for_each_connector_iter() and
+        * &struct drm_connector_list_iter to savely free connectors using
+        * @connector_free_work.
+        */
+       struct llist_head connector_free_list;
+       /**
+        * @connector_free_work: Work to clean up @connector_free_list.
+        */
+       struct work_struct connector_free_work;
+
         /**
          * @num_encoder:
          *
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h

index 6e45608b2399813329e2280e601c2465940def53..9da6ce22803f03fc318a7fdd33af380eae67d4e6 100644 (file)
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
         bool                    enabled;
  };
  
-int kvm_timer_hyp_init(void);
+int kvm_timer_hyp_init(bool);
  int kvm_timer_enable(struct kvm_vcpu *vcpu);
  int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
  void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/bio.h b/include/linux/bio.h

index 82f0c8fd7be8fd20951af806319f64f615f4ffc6..23d29b39f71e83e8a6a25540adc2e3f28702aec7 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
  
  #define bio_set_dev(bio, bdev)                         \
  do {                                           \
+       if ((bio)->bi_disk != (bdev)->bd_disk)  \
+               bio_clear_flag(bio, BIO_THROTTLED);\
         (bio)->bi_disk = (bdev)->bd_disk;       \
         (bio)->bi_partno = (bdev)->bd_partno;   \
  } while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index a1e628e032dad75bf1837a25e45b55a7f54ca2df..9e7d8bd776d227d2ba92b137af7230300f5b1d4a 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -50,8 +50,6 @@ struct blk_issue_stat {
  struct bio {
         struct bio              *bi_next;       /* request queue link */
         struct gendisk          *bi_disk;
-       u8                      bi_partno;
-       blk_status_t            bi_status;
         unsigned int            bi_opf;         /* bottom bits req flags,
                                                  * top bits REQ_OP. Use
                                                  * accessors.
@@ -59,8 +57,8 @@ struct bio {
         unsigned short          bi_flags;       /* status, etc and bvec pool number */
         unsigned short          bi_ioprio;
         unsigned short          bi_write_hint;
-
-       struct bvec_iter        bi_iter;
+       blk_status_t            bi_status;
+       u8                      bi_partno;
  
         /* Number of segments in this BIO after
          * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
         unsigned int            bi_seg_front_size;
         unsigned int            bi_seg_back_size;
  
-       atomic_t                __bi_remaining;
+       struct bvec_iter        bi_iter;
  
+       atomic_t                __bi_remaining;
         bio_end_io_t            *bi_end_io;
  
         void                    *bi_private;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 8089ca17db9ac65998ec9cf82f65743bb5c5abb9..0ce8a372d5069a7aca7810429a968d20e923d3d1 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
  struct request {
         struct list_head queuelist;
         union {
-               call_single_data_t csd;
+               struct __call_single_data csd;
                 u64 fifo_time;
         };
  
@@ -241,14 +241,24 @@ struct request {
         struct request *next_rq;
  };
  
+static inline bool blk_op_is_scsi(unsigned int op)
+{
+       return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_op_is_private(unsigned int op)
+{
+       return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
+}
+
  static inline bool blk_rq_is_scsi(struct request *rq)
  {
-       return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+       return blk_op_is_scsi(req_op(rq));
  }
  
  static inline bool blk_rq_is_private(struct request *rq)
  {
-       return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+       return blk_op_is_private(req_op(rq));
  }
  
  static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
         return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
  }
  
+static inline bool bio_is_passthrough(struct bio *bio)
+{
+       unsigned op = bio_op(bio);
+
+       return blk_op_is_scsi(op) || blk_op_is_private(op);
+}
+
  static inline unsigned short req_get_ioprio(struct request *req)
  {
         return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
  extern void blk_rq_unprep_clone(struct request *rq);
  extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                      struct request *rq);
-extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
+extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
  extern void blk_delay_queue(struct request_queue *, unsigned long);
  extern void blk_queue_split(struct request_queue *, struct bio **);
  extern void blk_recount_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index c561b986bab0ebf886000ea34e377ea789138a3b..1632bb13ad8aed8cfeba2ccc69cfa458d02540bb 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -15,11 +15,11 @@
   * In practice this is far bigger than any realistic pointer offset; this limit
   * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
   */
-#define BPF_MAX_VAR_OFF        (1ULL << 31)
+#define BPF_MAX_VAR_OFF        (1 << 29)
  /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
   * that converting umax_value to int cannot overflow.
   */
-#define BPF_MAX_VAR_SIZ        INT_MAX
+#define BPF_MAX_VAR_SIZ        (1 << 29)
  
  /* Liveness marks, used for registers and spilled-regs (in stack slots).
   * Read marks propagate upwards until they find a write mark; they record that
diff --git a/include/linux/compiler.h b/include/linux/compiler.h

index 188ed9f65517453d5bb97f2466167c069a7c0ae2..52e611ab9a6cf6fde23dae53784f01bfb06ce448 100644 (file)
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -220,21 +220,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  /*
   * Prevent the compiler from merging or refetching reads or writes. The
   * compiler is also forbidden from reordering successive instances of
- * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
- * compiler is aware of some particular ordering.  One way to make the
- * compiler aware of ordering is to put the two invocations of READ_ONCE,
- * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
   *
- * In contrast to ACCESS_ONCE these two macros will also work on aggregate
- * data types like structs or unions. If the size of the accessed data
- * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
- * least two memcpy()s: one for the __builtin_memcpy() and then one for
- * the macro doing the copy of variable - '__u' allocated on the stack.
+ * These two macros will also work on aggregate data types like structs or
+ * unions. If the size of the accessed data type exceeds the word size of
+ * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
+ * fall back to memcpy(). There's at least two memcpy()s: one for the
+ * __builtin_memcpy() and then one for the macro doing the copy of variable
+ * - '__u' allocated on the stack.
   *
   * Their two major use cases are: (1) Mediating communication between
   * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
   * mutilate accesses that either do not require ordering or that interact
   * with an explicit memory barrier or atomic instruction that provides the
   * required ordering.
@@ -327,29 +327,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
         compiletime_assert(__native_word(t),                            \
                 "Need native word sized stores/loads for atomicity.")
  
-/*
- * Prevent the compiler from merging or refetching accesses.  The compiler
- * is also forbidden from reordering successive instances of ACCESS_ONCE(),
- * but only when the compiler is aware of some particular ordering.  One way
- * to make the compiler aware of ordering is to put the two invocations of
- * ACCESS_ONCE() in different C statements.
- *
- * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE
- * on a union member will work as long as the size of the member matches the
- * size of the union and the size is smaller than word size.
- *
- * The major use cases of ACCESS_ONCE used to be (1) Mediating communication
- * between process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
- * mutilate accesses that either do not require ordering or that interact
- * with an explicit memory barrier or atomic instruction that provides the
- * required ordering.
- *
- * If possible use READ_ONCE()/WRITE_ONCE() instead.
- */
-#define __ACCESS_ONCE(x) ({ \
-        __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
-       (volatile typeof(x) *)&(x); })
-#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x))
-
  #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/completion.h b/include/linux/completion.h

index 0662a417febe34fb9e638857f13a6d52fcaf0d49..94a59ba7d422f4d3b4a53314d99fa4a1367fbcc4 100644 (file)
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -10,9 +10,6 @@
   */
  
  #include <linux/wait.h>
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-#include <linux/lockdep.h>
-#endif
  
  /*
   * struct completion - structure used to maintain state for a "completion"
@@ -29,58 +26,16 @@
  struct completion {
         unsigned int done;
         wait_queue_head_t wait;
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-       struct lockdep_map_cross map;
-#endif
  };
  
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-static inline void complete_acquire(struct completion *x)
-{
-       lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
-}
-
-static inline void complete_release(struct completion *x)
-{
-       lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
-}
-
-static inline void complete_release_commit(struct completion *x)
-{
-       lock_commit_crosslock((struct lockdep_map *)&x->map);
-}
-
-#define init_completion_map(x, m)                                      \
-do {                                                                   \
-       lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map,     \
-                       (m)->name, (m)->key, 0);                                \
-       __init_completion(x);                                           \
-} while (0)
-
-#define init_completion(x)                                             \
-do {                                                                   \
-       static struct lock_class_key __key;                             \
-       lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map,     \
-                       "(completion)" #x,                              \
-                       &__key, 0);                                     \
-       __init_completion(x);                                           \
-} while (0)
-#else
  #define init_completion_map(x, m) __init_completion(x)
  #define init_completion(x) __init_completion(x)
  static inline void complete_acquire(struct completion *x) {}
  static inline void complete_release(struct completion *x) {}
  static inline void complete_release_commit(struct completion *x) {}
-#endif
  
-#ifdef CONFIG_LOCKDEP_COMPLETIONS
-#define COMPLETION_INITIALIZER(work) \
-       { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
-       STATIC_CROSS_LOCKDEP_MAP_INIT("(completion)" #work, &(work)) }
-#else
  #define COMPLETION_INITIALIZER(work) \
         { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
-#endif
  
  #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
         (*({ init_completion_map(&(work), &(map)); &(work); }))
diff --git a/include/linux/cred.h b/include/linux/cred.h

index 099058e1178b4d8529438450e28ad03b06497d32..631286535d0f126a13a366b924cae62f58d114e1 100644 (file)
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -83,6 +83,7 @@ extern int set_current_groups(struct group_info *);
  extern void set_groups(struct cred *, struct group_info *);
  extern int groups_search(const struct group_info *, kgid_t);
  extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
  
  /*
   * The security context of a task
diff --git a/include/linux/idr.h b/include/linux/idr.h

index 7c3a365f7e127ac29cfedc6b5debe81eb87abe74..fa14f834e4ede3a81e7e6182c889fec338b2495f 100644 (file)
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
  #include <linux/radix-tree.h>
  #include <linux/gfp.h>
  #include <linux/percpu.h>
+#include <linux/bug.h>
  
  struct idr {
         struct radix_tree_root  idr_rt;
diff --git a/include/linux/pti.h b/include/linux/intel-pti.h

similarity index 94%

rename from include/linux/pti.h

rename to include/linux/intel-pti.h

index b3ea01a3197efa5c30768fe00ee0f24aad6941aa..2710d72de3c926c2a4e1be36a921aa23ac107c31 100644 (file)
--- a/include/linux/pti.h
+++ b/include/linux/intel-pti.h
@@ -22,8 +22,8 @@
   * interface to write out it's contents for debugging a mobile system.
   */
  
-#ifndef PTI_H_
-#define PTI_H_
+#ifndef LINUX_INTEL_PTI_H_
+#define LINUX_INTEL_PTI_H_
  
  /* offset for last dword of any PTI message. Part of MIPI P1149.7 */
  #define PTI_LASTDWORD_DTS      0x30
@@ -40,4 +40,4 @@ struct pti_masterchannel *pti_request_masterchannel(u8 type,
                                                     const char *thread_name);
  void pti_release_masterchannel(struct pti_masterchannel *mc);
  
-#endif /*PTI_H_*/
+#endif /* LINUX_INTEL_PTI_H_ */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h

index cb18c6290ca87290996e636f3eda14eb03d26316..8415bf1a9776245b810c8f92fa16c98eb038a45f 100644 (file)
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
                                                  * 100: prefer care-of address
                                                  */
                                 dontfrag:1,
-                               autoflowlabel:1;
+                               autoflowlabel:1,
+                               autoflowlabel_set:1;
         __u8                    min_hopcount;
         __u8                    tclass;
         __be32                  rcv_flowinfo;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h

index a842551fe0449a38df5510a39d3fdc4760caa0ee..2e75dc34bff5cd3e468571918329793eaf7a0911 100644 (file)
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -158,12 +158,6 @@ struct lockdep_map {
         int                             cpu;
         unsigned long                   ip;
  #endif
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-       /*
-        * Whether it's a crosslock.
-        */
-       int                             cross;
-#endif
  };
  
  static inline void lockdep_copy_map(struct lockdep_map *to,
@@ -267,95 +261,8 @@ struct held_lock {
         unsigned int hardirqs_off:1;
         unsigned int references:12;                                     /* 32 bits */
         unsigned int pin_count;
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-       /*
-        * Generation id.
-        *
-        * A value of cross_gen_id will be stored when holding this,
-        * which is globally increased whenever each crosslock is held.
-        */
-       unsigned int gen_id;
-#endif
-};
-
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-#define MAX_XHLOCK_TRACE_ENTRIES 5
-
-/*
- * This is for keeping locks waiting for commit so that true dependencies
- * can be added at commit step.
- */
-struct hist_lock {
-       /*
-        * Id for each entry in the ring buffer. This is used to
-        * decide whether the ring buffer was overwritten or not.
-        *
-        * For example,
-        *
-        *           |<----------- hist_lock ring buffer size ------->|
-        *           pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii
-        * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii.......................
-        *
-        *           where 'p' represents an acquisition in process
-        *           context, 'i' represents an acquisition in irq
-        *           context.
-        *
-        * In this example, the ring buffer was overwritten by
-        * acquisitions in irq context, that should be detected on
-        * rollback or commit.
-        */
-       unsigned int hist_id;
-
-       /*
-        * Seperate stack_trace data. This will be used at commit step.
-        */
-       struct stack_trace      trace;
-       unsigned long           trace_entries[MAX_XHLOCK_TRACE_ENTRIES];
-
-       /*
-        * Seperate hlock instance. This will be used at commit step.
-        *
-        * TODO: Use a smaller data structure containing only necessary
-        * data. However, we should make lockdep code able to handle the
-        * smaller one first.
-        */
-       struct held_lock        hlock;
  };
  
-/*
- * To initialize a lock as crosslock, lockdep_init_map_crosslock() should
- * be called instead of lockdep_init_map().
- */
-struct cross_lock {
-       /*
-        * When more than one acquisition of crosslocks are overlapped,
-        * we have to perform commit for them based on cross_gen_id of
-        * the first acquisition, which allows us to add more true
-        * dependencies.
-        *
-        * Moreover, when no acquisition of a crosslock is in progress,
-        * we should not perform commit because the lock might not exist
-        * any more, which might cause incorrect memory access. So we
-        * have to track the number of acquisitions of a crosslock.
-        */
-       int nr_acquire;
-
-       /*
-        * Seperate hlock instance. This will be used at commit step.
-        *
-        * TODO: Use a smaller data structure containing only necessary
-        * data. However, we should make lockdep code able to handle the
-        * smaller one first.
-        */
-       struct held_lock        hlock;
-};
-
-struct lockdep_map_cross {
-       struct lockdep_map map;
-       struct cross_lock xlock;
-};
-#endif
-
  /*
   * Initialization, self-test and debugging-output methods:
   */
@@ -560,37 +467,6 @@ enum xhlock_context_t {
         XHLOCK_CTX_NR,
  };
  
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-extern void lockdep_init_map_crosslock(struct lockdep_map *lock,
-                                      const char *name,
-                                      struct lock_class_key *key,
-                                      int subclass);
-extern void lock_commit_crosslock(struct lockdep_map *lock);
-
-/*
- * What we essencially have to initialize is 'nr_acquire'. Other members
- * will be initialized in add_xlock().
- */
-#define STATIC_CROSS_LOCK_INIT() \
-       { .nr_acquire = 0,}
-
-#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \
-       { .map.name = (_name), .map.key = (void *)(_key), \
-         .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), }
-
-/*
- * To initialize a lockdep_map statically use this macro.
- * Note that _name must not be NULL.
- */
-#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
-       { .name = (_name), .key = (void *)(_key), .cross = 0, }
-
-extern void crossrelease_hist_start(enum xhlock_context_t c);
-extern void crossrelease_hist_end(enum xhlock_context_t c);
-extern void lockdep_invariant_state(bool force);
-extern void lockdep_init_task(struct task_struct *task);
-extern void lockdep_free_task(struct task_struct *task);
-#else /* !CROSSRELEASE */
  #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
  /*
   * To initialize a lockdep_map statically use this macro.
@@ -604,7 +480,6 @@ static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
  static inline void lockdep_invariant_state(bool force) {}
  static inline void lockdep_init_task(struct task_struct *task) {}
  static inline void lockdep_free_task(struct task_struct *task) {}
-#endif /* CROSSRELEASE */
  
  #ifdef CONFIG_LOCK_STAT
  
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h

index a2a1318a3d0c8be0a1fb3d1a08fcf671ff9d8bee..c3d3f04d8cc689eddf217c0626e71d8c16530db5 100644 (file)
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -915,10 +915,10 @@ enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
  #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN        BIT(6)
  
  enum dev_aspm_mode {
-       DEV_ASPM_DISABLE = 0,
         DEV_ASPM_DYNAMIC,
         DEV_ASPM_BACKDOOR,
         DEV_ASPM_STATIC,
+       DEV_ASPM_DISABLE,
  };
  
  /*
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h

index a886b51511abbf146c4c76309aa313e4bbf77dda..57b109c6e422784dd0498c46f9a1a6e96f3ef0ca 100644 (file)
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
  };
  
  struct mlx5_irq_info {
+       cpumask_var_t mask;
         char name[MLX5_MAX_IRQ_NAME];
  };
  
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                        enum mlx5_eq_type type);
  int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
  int mlx5_start_eqs(struct mlx5_core_dev *dev);
-int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
  int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                     unsigned int *irqn);
  int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h

index 38a7577a9ce71fbcf63c21e2911364795842daa8..d44ec5f41d4a04c72b25b4db1d6fb0217f8f1fa1 100644 (file)
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -147,7 +147,7 @@ enum {
         MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
         MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
         MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
-       MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+       MLX5_CMD_OP_SET_PP_RATE_LIMIT             = 0x780,
         MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
         MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
         MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
         u8         vxlan_udp_port[0x10];
  };
  
-struct mlx5_ifc_set_rate_limit_out_bits {
+struct mlx5_ifc_set_pp_rate_limit_out_bits {
         u8         status[0x8];
         u8         reserved_at_8[0x18];
  
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
         u8         reserved_at_40[0x40];
  };
  
-struct mlx5_ifc_set_rate_limit_in_bits {
+struct mlx5_ifc_set_pp_rate_limit_in_bits {
         u8         opcode[0x10];
         u8         reserved_at_10[0x10];
  
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
         u8         reserved_at_60[0x20];
  
         u8         rate_limit[0x20];
+
+       u8         reserved_at_a0[0x160];
  };
  
  struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/oom.h b/include/linux/oom.h

index 01c91d874a57f5edc7f4698b57918402a211d707..5bad038ac012e6e3047fd6a63a625c6814966d51 100644 (file)
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -66,6 +66,15 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk)
         return tsk->signal->oom_mm;
  }
  
+/*
+ * Use this helper if tsk->mm != mm and the victim mm needs a special
+ * handling. This is guaranteed to stay true after once set.
+ */
+static inline bool mm_is_oom_victim(struct mm_struct *mm)
+{
+       return test_bit(MMF_OOM_VICTIM, &mm->flags);
+}
+
  /*
   * Checks whether a page fault on the given mm is still reliable.
   * This is no longer true if the oom reaper started to reap the
diff --git a/include/linux/pci.h b/include/linux/pci.h

index 0403894147a3ca970ff7248549919693b53d9e8e..c170c9250c8b706e62e00f4b6a26149735bacba5 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1674,6 +1674,9 @@ static inline struct pci_dev *pci_get_slot(struct pci_bus *bus,
  static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
                                                 unsigned int devfn)
  { return NULL; }
+static inline struct pci_dev *pci_get_domain_bus_and_slot(int domain,
+                                       unsigned int bus, unsigned int devfn)
+{ return NULL; }
  
  static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
  static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; }
diff --git a/include/linux/pm.h b/include/linux/pm.h

index 65d39115f06d8780399e469ed44fc1c90f074e69..492ed473ba7e440ad422c3a5c16833ef47e66374 100644 (file)
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -765,6 +765,7 @@ extern int pm_generic_poweroff_late(struct device *dev);
  extern int pm_generic_poweroff(struct device *dev);
  extern void pm_generic_complete(struct device *dev);
  
+extern void dev_pm_skip_next_resume_phases(struct device *dev);
  extern bool dev_pm_smart_suspend_and_suspended(struct device *dev);
  
  #else /* !CONFIG_PM_SLEEP */
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h

index 37b4bb2545b32dc82633df6b76e629b9999f900b..6866df4f31b59da506d56b3db29501a56d813635 100644 (file)
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -101,12 +101,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r)
  
  /* Note: callers invoking this in a loop must use a compiler barrier,
   * for example cpu_relax(). Callers must hold producer_lock.
+ * Callers are responsible for making sure pointer that is being queued
+ * points to a valid data.
   */
  static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
  {
         if (unlikely(!r->size) || r->queue[r->producer])
                 return -ENOSPC;
  
+       /* Make sure the pointer we are storing points to a valid data. */
+       /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+       smp_wmb();
+
         r->queue[r->producer++] = ptr;
         if (unlikely(r->producer >= r->size))
                 r->producer = 0;
@@ -275,6 +281,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
         if (ptr)
                 __ptr_ring_discard_one(r);
  
+       /* Make sure anyone accessing data through the pointer is up to date. */
+       /* Pairs with smp_wmb in __ptr_ring_produce. */
+       smp_read_barrier_depends();
         return ptr;
  }
  
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h

index d574361943ea832532cf93177e97a8c62d6c3430..fcbeed4053efbba81993fea4151a8a6a6cd6f5d1 100644 (file)
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -99,6 +99,8 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
                             struct rb_root *root);
  extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
                                 struct rb_root *root);
+extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
+                                  struct rb_root_cached *root);
  
  static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
                                 struct rb_node **rb_link)
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h

index cc0072e93e360722f40a19928c001a1feab272bb..857a72ceb794252eb8cb22c55ef85d17453c34a0 100644 (file)
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -10,9 +10,6 @@
   */
  typedef struct {
         arch_rwlock_t raw_lock;
-#ifdef CONFIG_GENERIC_LOCKBREAK
-       unsigned int break_lock;
-#endif
  #ifdef CONFIG_DEBUG_SPINLOCK
         unsigned int magic, owner_cpu;
         void *owner;
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 21991d668d35231e625b95d99e2bd6826ba6a47e..d2588263a9893caa04d8854607207c41080927cc 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -849,17 +849,6 @@ struct task_struct {
         struct held_lock                held_locks[MAX_LOCK_DEPTH];
  #endif
  
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-#define MAX_XHLOCKS_NR 64UL
-       struct hist_lock *xhlocks; /* Crossrelease history locks */
-       unsigned int xhlock_idx;
-       /* For restoring at history boundaries */
-       unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
-       unsigned int hist_id;
-       /* For overwrite check at each context exit */
-       unsigned int hist_id_save[XHLOCK_CTX_NR];
-#endif
-
  #ifdef CONFIG_UBSAN
         unsigned int                    in_ubsan;
  #endif
@@ -1503,7 +1492,11 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from)
         __set_task_comm(tsk, from, false);
  }
  
-extern char *get_task_comm(char *to, struct task_struct *tsk);
+extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
+#define get_task_comm(buf, tsk) ({                     \
+       BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN);     \
+       __get_task_comm(buf, sizeof(buf), tsk);         \
+})
  
  #ifdef CONFIG_SMP
  void scheduler_ipi(void);
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h

index 9c8847395b5e15347e150d9b99337a70217b97db..ec912d01126f4b01f5ac61ebae7f73f8e0527d23 100644 (file)
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -70,6 +70,7 @@ static inline int get_dumpable(struct mm_struct *mm)
  #define MMF_UNSTABLE           22      /* mm is unstable for copy_from_user */
  #define MMF_HUGE_ZERO_PAGE     23      /* mm has ever used the global huge zero page */
  #define MMF_DISABLE_THP                24      /* disable THP for all VMAs */
+#define MMF_OOM_VICTIM         25      /* mm is the oom victim */
  #define MMF_DISABLE_THP_MASK   (1 << MMF_DISABLE_THP)
  
  #define MMF_INIT_MASK          (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h

index 7b2170bfd6e7dae432478fffdbc70e1408740394..bc6bb325d1bf7c03db223c568b891e5d33dc93ca 100644 (file)
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
   *     for that name.  This appears in the sysfs "modalias" attribute
   *     for driver coldplugging, and in uevents used for hotplugging
   * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
- *     when not using a GPIO line)
+ *     not using a GPIO line)
   *
   * @statistics: statistics for the spi_device
   *
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h

index a39186194cd6782ac0b6705f02af6179a86f7813..3bf273538840103c7b6c634b148b8c74ce754843 100644 (file)
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -107,16 +107,11 @@ do {                                                              \
  
  #define raw_spin_is_locked(lock)       arch_spin_is_locked(&(lock)->raw_lock)
  
-#ifdef CONFIG_GENERIC_LOCKBREAK
-#define raw_spin_is_contended(lock) ((lock)->break_lock)
-#else
-
  #ifdef arch_spin_is_contended
  #define raw_spin_is_contended(lock)    arch_spin_is_contended(&(lock)->raw_lock)
  #else
  #define raw_spin_is_contended(lock)    (((void)(lock), 0))
  #endif /*arch_spin_is_contended*/
-#endif
  
  /*
   * This barrier must provide two things:
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h

index 73548eb13a5ddc82ea16c6a292a8d704471eb7ac..24b4e6f2c1a22fe2a6717f4d6076ea87d595994b 100644 (file)
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -19,9 +19,6 @@
  
  typedef struct raw_spinlock {
         arch_spinlock_t raw_lock;
-#ifdef CONFIG_GENERIC_LOCKBREAK
-       unsigned int break_lock;
-#endif
  #ifdef CONFIG_DEBUG_SPINLOCK
         unsigned int magic, owner_cpu;
         void *owner;
diff --git a/include/linux/string.h b/include/linux/string.h

index 410ecf17de3ce591017e36a95f28de18ae54dcc1..cfd83eb2f926c74622f46ed931bb1c58277df49f 100644 (file)
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -259,7 +259,10 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
  {
         __kernel_size_t ret;
         size_t p_size = __builtin_object_size(p, 0);
-       if (p_size == (size_t)-1)
+
+       /* Work around gcc excess stack consumption issue */
+       if (p_size == (size_t)-1 ||
+           (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
                 return __builtin_strlen(p);
         ret = strnlen(p, p_size);
         if (p_size <= ret)
diff --git a/include/linux/trace.h b/include/linux/trace.h

index d24991c1fef33343d7b7bc65d6b9ec67d8d9f575..b95ffb2188abaaac85dc992c6545d33b946495d1 100644 (file)
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -18,7 +18,7 @@
   */
  struct trace_export {
         struct trace_export __rcu       *next;
-       void (*write)(const void *, unsigned int);
+       void (*write)(struct trace_export *, const void *, unsigned int);
  };
  
  int register_ftrace_export(struct trace_export *export);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h

index 8b8118a7fadbc74a9aa879dc934de0442bb3a013..cb4d92b79cd932eda4e178861d8345683b329bdb 100644 (file)
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
   * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
   * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
   *     auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
- * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
   * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
   *     firmware.
   * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
diff --git a/include/net/gue.h b/include/net/gue.h

index 2fdb29ca74c2ab26068c57e0134e58b35019ac0e..fdad41469b6521bed2d666cf1cf7925579ec8111 100644 (file)
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -44,10 +44,10 @@ struct guehdr {
  #else
  #error  "Please fix <asm/byteorder.h>"
  #endif
-                       __u8    proto_ctype;
-                       __u16   flags;
+                       __u8    proto_ctype;
+                       __be16  flags;
                 };
-               __u32 word;
+               __be32  word;
         };
  };
  
@@ -84,11 +84,10 @@ static inline size_t guehdr_priv_flags_len(__be32 flags)
   * if there is an unknown standard or private flags, or the options length for
   * the flags exceeds the options length specific in hlen of the GUE header.
   */
-static inline int validate_gue_flags(struct guehdr *guehdr,
-                                    size_t optlen)
+static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen)
  {
+       __be16 flags = guehdr->flags;
         size_t len;
-       __be32 flags = guehdr->flags;
  
         if (flags & ~GUE_FLAGS_ALL)
                 return 1;
@@ -101,12 +100,13 @@ static inline int validate_gue_flags(struct guehdr *guehdr,
                 /* Private flags are last four bytes accounted in
                  * guehdr_flags_len
                  */
-               flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV);
+               __be32 pflags = *(__be32 *)((void *)&guehdr[1] +
+                                           len - GUE_LEN_PRIV);
  
-               if (flags & ~GUE_PFLAGS_ALL)
+               if (pflags & ~GUE_PFLAGS_ALL)
                         return 1;
  
-               len += guehdr_priv_flags_len(flags);
+               len += guehdr_priv_flags_len(pflags);
                 if (len > optlen)
                         return 1;
         }
diff --git a/include/net/ip.h b/include/net/ip.h

index 9896f46cbbf11235395d75a5ec18a14736ee099d..af8addbaa3c188a896b74ff9646b6fdd692d1c8e 100644 (file)
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -34,6 +34,7 @@
  #include <net/flow_dissector.h>
  
  #define IPV4_MAX_PMTU          65535U          /* RFC 2675, Section 5.1 */
+#define IPV4_MIN_MTU           68                      /* RFC 791 */
  
  struct sock;
  
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h

index 0105445cab83d32008b3526794c077f4bfbd9816..8e08b6da72f325bd4a623191e886fb1b746644d7 100644 (file)
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
  };
  
  enum tc_clsbpf_command {
-       TC_CLSBPF_ADD,
-       TC_CLSBPF_REPLACE,
-       TC_CLSBPF_DESTROY,
+       TC_CLSBPF_OFFLOAD,
         TC_CLSBPF_STATS,
  };
  
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
         enum tc_clsbpf_command command;
         struct tcf_exts *exts;
         struct bpf_prog *prog;
+       struct bpf_prog *oldprog;
         const char *name;
         bool exts_integrated;
         u32 gen_flags;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index 65d0d25f2648f645bad707c348cbb6454ef75cd5..83a3e47d5845b99fa61799a15b29e7247d478c72 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -71,6 +71,7 @@ struct Qdisc {
                                       * qdisc_tree_decrease_qlen() should stop.
                                       */
  #define TCQ_F_INVISIBLE                0x80 /* invisible by default in dump */
+#define TCQ_F_OFFLOADED                0x200 /* qdisc is offloaded to HW */
         u32                     limit;
         const struct Qdisc_ops  *ops;
         struct qdisc_size_table __rcu *stab;
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h

index 758607226bfdd5269d33557b650a31628a59d22f..2cd449328aee37e55de94633d803d73fe3057f9e 100644 (file)
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
  
         TP_STRUCT__entry(
                 __string(        name,           core->name                )
-               __string(        pname,          parent->name              )
+               __string(        pname, parent ? parent->name : "none"     )
         ),
  
         TP_fast_assign(
                 __assign_str(name, core->name);
-               __assign_str(pname, parent->name);
+               __assign_str(pname, parent ? parent->name : "none");
         ),
  
         TP_printk("%s %s", __get_str(name), __get_str(pname))
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h

index e4b0b8e099325f2801e4f3af168004c603c23794..2c735a3e66133fc08740b4df6d64919c491c9d1e 100644 (file)
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
         { KVM_TRACE_MMIO_WRITE, "write" }
  
  TRACE_EVENT(kvm_mmio,
-       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_PROTO(int type, int len, u64 gpa, void *val),
         TP_ARGS(type, len, gpa, val),
  
         TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
                 __entry->type           = type;
                 __entry->len            = len;
                 __entry->gpa            = gpa;
-               __entry->val            = val;
+               __entry->val            = 0;
+               if (val)
+                       memcpy(&__entry->val, val,
+                              min_t(u32, sizeof(__entry->val), len));
         ),
  
         TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h

index f5024c560d8ff028b8952083625f66cde6d77f97..9c4eb33c5a1d35d9fcfa83b8386a7c4be3ef7c9e 100644 (file)
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -56,15 +56,18 @@ DEFINE_EVENT(preemptirq_template, preempt_enable,
  
  #include <trace/define_trace.h>
  
-#else /* !CONFIG_PREEMPTIRQ_EVENTS */
+#endif /* !CONFIG_PREEMPTIRQ_EVENTS */
  
+#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING)
  #define trace_irq_enable(...)
  #define trace_irq_disable(...)
-#define trace_preempt_enable(...)
-#define trace_preempt_disable(...)
  #define trace_irq_enable_rcuidle(...)
  #define trace_irq_disable_rcuidle(...)
+#endif
+
+#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT)
+#define trace_preempt_enable(...)
+#define trace_preempt_disable(...)
  #define trace_preempt_enable_rcuidle(...)
  #define trace_preempt_disable_rcuidle(...)
-
  #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h

index af3cc2f4e1ad00dff0e011a4e94e0905d085d0ca..37b5096ae97be4e6115b0941b82918e11250ee6b 100644 (file)
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -256,7 +256,6 @@ struct tc_red_qopt {
  #define TC_RED_ECN             1
  #define TC_RED_HARDDROP                2
  #define TC_RED_ADAPTATIVE      4
-#define TC_RED_OFFLOADED       8
  };
  
  struct tc_red_xstats {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h

index d8b5f80c2ea66dd2d75600c1a556c2bc9cfa101f..843e29aa3cacf0e06beea9b59af5b5c7acc9ca3b 100644 (file)
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -557,6 +557,7 @@ enum {
         TCA_PAD,
         TCA_DUMP_INVISIBLE,
         TCA_CHAIN,
+       TCA_HW_OFFLOAD,
         __TCA_MAX
  };
  
diff --git a/include/xen/balloon.h b/include/xen/balloon.h

index 4914b93a23f2bdeb066a4048d7ab749456ae3fe3..61f410fd74e4cf4180f7ad5ffa1d996cc1528c91 100644 (file)
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
  {
  }
  #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+struct resource;
+void arch_xen_balloon_init(struct resource *hostmem_resource);
+#endif
diff --git a/init/main.c b/init/main.c

index dfec3809e7404f9658d51a20aae3869cd7aab9c3..7b606fc4848264f3eb52a86bc2f6480585cb5654 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -504,6 +504,8 @@ static void __init mm_init(void)
         pgtable_init();
         vmalloc_init();
         ioremap_huge_init();
+       /* Should be run before the first non-init thread is created */
+       init_espfix_bsp();
  }
  
  asmlinkage __visible void __init start_kernel(void)
@@ -588,6 +590,12 @@ asmlinkage __visible void __init start_kernel(void)
                 local_irq_disable();
         radix_tree_init();
  
+       /*
+        * Set up housekeeping before setting up workqueues to allow the unbound
+        * workqueue to take non-housekeeping into account.
+        */
+       housekeeping_init();
+
         /*
          * Allow workqueue creation and work item queueing/cancelling
          * early.  Work item execution depends on kthreads and starts after
@@ -605,7 +613,6 @@ asmlinkage __visible void __init start_kernel(void)
         early_irq_init();
         init_IRQ();
         tick_init();
-       housekeeping_init();
         rcu_init_nohz();
         init_timers();
         hrtimers_init();
@@ -673,10 +680,6 @@ asmlinkage __visible void __init start_kernel(void)
  #ifdef CONFIG_X86
         if (efi_enabled(EFI_RUNTIME_SERVICES))
                 efi_enter_virtual_mode();
-#endif
-#ifdef CONFIG_X86_ESPFIX64
-       /* Should be run before the first non-init thread is created */
-       init_espfix_bsp();
  #endif
         thread_stack_cache_init();
         cred_init();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c

index e469e05c8e83bc3256378644e3f3c26555651261..3905d4bc5b80d74f0b8f9e2e8f8526a0115ce239 100644 (file)
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab)
                 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
                                          htab->map.key_size);
                 free_percpu(pptr);
+               cond_resched();
         }
  free_elems:
         bpf_map_area_free(htab->elems);
@@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab)
                         goto free_elems;
                 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
                                   pptr);
+               cond_resched();
         }
  
  skip_percpu_elems:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index d4593571c4049b8d046f53f81f8e17911a21e0c9..04b24876cd23c83c9502afc60853c871ee3fee13 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                 break;
         case PTR_TO_STACK:
                 pointer_desc = "stack ";
+               /* The stack spill tracking logic in check_stack_write()
+                * and check_stack_read() relies on stack accesses being
+                * aligned.
+                */
+               strict = true;
                 break;
         default:
                 break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                                            strict);
  }
  
+/* truncate register to smaller size (in bytes)
+ * must be called with size < BPF_REG_SIZE
+ */
+static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+{
+       u64 mask;
+
+       /* clear high bits in bit representation */
+       reg->var_off = tnum_cast(reg->var_off, size);
+
+       /* fix arithmetic bounds */
+       mask = ((u64)1 << (size * 8)) - 1;
+       if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
+               reg->umin_value &= mask;
+               reg->umax_value &= mask;
+       } else {
+               reg->umin_value = 0;
+               reg->umax_value = mask;
+       }
+       reg->smin_value = reg->umin_value;
+       reg->smax_value = reg->umax_value;
+}
+
  /* check whether memory at (regno + off) is accessible for t = (read | write)
   * if t==write, value_regno is a register which value is stored into memory
   * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
             regs[value_regno].type == SCALAR_VALUE) {
                 /* b/h/w load zero-extends, mark upper bits as known 0 */
-               regs[value_regno].var_off =
-                       tnum_cast(regs[value_regno].var_off, size);
-               __update_reg_bounds(&regs[value_regno]);
+               coerce_reg_to_size(&regs[value_regno], size);
         }
         return err;
  }
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
                 verbose(env, "invalid variable stack read R%d var_off=%s\n",
                         regno, tn_buf);
+               return -EACCES;
         }
         off = regs[regno].off + regs[regno].var_off.value;
         if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
                 return -EINVAL;
         }
  
+       /* With LD_ABS/IND some JITs save/restore skb from r1. */
         changes_data = bpf_helper_changes_pkt_data(fn->func);
+       if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
+               verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
+                       func_id_name(func_id), func_id);
+               return -EINVAL;
+       }
  
         memset(&meta, 0, sizeof(meta));
         meta.pkt_access = fn->pkt_access;
@@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
         return 0;
  }
  
-static void coerce_reg_to_32(struct bpf_reg_state *reg)
-{
-       /* clear high 32 bits */
-       reg->var_off = tnum_cast(reg->var_off, 4);
-       /* Update bounds */
-       __update_reg_bounds(reg);
-}
-
  static bool signed_add_overflows(s64 a, s64 b)
  {
         /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
         return res > a;
  }
  
+static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+                                 const struct bpf_reg_state *reg,
+                                 enum bpf_reg_type type)
+{
+       bool known = tnum_is_const(reg->var_off);
+       s64 val = reg->var_off.value;
+       s64 smin = reg->smin_value;
+
+       if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+               verbose(env, "math between %s pointer and %lld is not allowed\n",
+                       reg_type_str[type], val);
+               return false;
+       }
+
+       if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "%s pointer offset %d is not allowed\n",
+                       reg_type_str[type], reg->off);
+               return false;
+       }
+
+       if (smin == S64_MIN) {
+               verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+                       reg_type_str[type]);
+               return false;
+       }
+
+       if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "value %lld makes %s pointer be out of bounds\n",
+                       smin, reg_type_str[type]);
+               return false;
+       }
+
+       return true;
+}
+
  /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
   * Caller should also handle BPF_MOV case separately.
   * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
  
         if (BPF_CLASS(insn->code) != BPF_ALU64) {
                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
-               if (!env->allow_ptr_leaks)
-                       verbose(env,
-                               "R%d 32-bit pointer arithmetic prohibited\n",
-                               dst);
+               verbose(env,
+                       "R%d 32-bit pointer arithmetic prohibited\n",
+                       dst);
                 return -EACCES;
         }
  
         if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+                       dst);
                 return -EACCES;
         }
         if (ptr_reg->type == CONST_PTR_TO_MAP) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+                       dst);
                 return -EACCES;
         }
         if (ptr_reg->type == PTR_TO_PACKET_END) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+                       dst);
                 return -EACCES;
         }
  
@@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
         dst_reg->type = ptr_reg->type;
         dst_reg->id = ptr_reg->id;
  
+       if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
+           !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+               return -EINVAL;
+
         switch (opcode) {
         case BPF_ADD:
                 /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
         case BPF_SUB:
                 if (dst_reg == off_reg) {
                         /* scalar -= pointer.  Creates an unknown scalar */
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d tried to subtract pointer from scalar\n",
-                                       dst);
+                       verbose(env, "R%d tried to subtract pointer from scalar\n",
+                               dst);
                         return -EACCES;
                 }
                 /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                  * be able to deal with it.
                  */
                 if (ptr_reg->type == PTR_TO_STACK) {
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d subtraction from stack pointer prohibited\n",
-                                       dst);
+                       verbose(env, "R%d subtraction from stack pointer prohibited\n",
+                               dst);
                         return -EACCES;
                 }
                 if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
         case BPF_AND:
         case BPF_OR:
         case BPF_XOR:
-               /* bitwise ops on pointers are troublesome, prohibit for now.
-                * (However, in principle we could allow some cases, e.g.
-                * ptr &= ~3 which would reduce min_value by 3.)
-                */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               /* bitwise ops on pointers are troublesome, prohibit. */
+               verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                 return -EACCES;
         default:
                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                 return -EACCES;
         }
  
+       if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+               return -EINVAL;
+
         __update_reg_bounds(dst_reg);
         __reg_deduce_bounds(dst_reg);
         __reg_bound_offset(dst_reg);
         return 0;
  }
  
+/* WARNING: This function does calculations on 64-bit values, but the actual
+ * execution may occur on 32-bit values. Therefore, things like bitshifts
+ * need extra checks in the 32-bit case.
+ */
  static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                                       struct bpf_insn *insn,
                                       struct bpf_reg_state *dst_reg,
@@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
         bool src_known, dst_known;
         s64 smin_val, smax_val;
         u64 umin_val, umax_val;
+       u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
  
-       if (BPF_CLASS(insn->code) != BPF_ALU64) {
-               /* 32-bit ALU ops are (32,32)->64 */
-               coerce_reg_to_32(dst_reg);
-               coerce_reg_to_32(&src_reg);
-       }
         smin_val = src_reg.smin_value;
         smax_val = src_reg.smax_value;
         umin_val = src_reg.umin_value;
@@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
         src_known = tnum_is_const(src_reg.var_off);
         dst_known = tnum_is_const(dst_reg->var_off);
  
+       if (!src_known &&
+           opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+               __mark_reg_unknown(dst_reg);
+               return 0;
+       }
+
         switch (opcode) {
         case BPF_ADD:
                 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                 __update_reg_bounds(dst_reg);
                 break;
         case BPF_LSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                          */
                         mark_reg_unknown(env, regs, insn->dst_reg);
                         break;
@@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                 __update_reg_bounds(dst_reg);
                 break;
         case BPF_RSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                          */
                         mark_reg_unknown(env, regs, insn->dst_reg);
                         break;
                 }
-               /* BPF_RSH is an unsigned shift, so make the appropriate casts */
-               if (dst_reg->smin_value < 0) {
-                       if (umin_val) {
-                               /* Sign bit will be cleared */
-                               dst_reg->smin_value = 0;
-                       } else {
-                               /* Lost sign bit information */
-                               dst_reg->smin_value = S64_MIN;
-                               dst_reg->smax_value = S64_MAX;
-                       }
-               } else {
-                       dst_reg->smin_value =
-                               (u64)(dst_reg->smin_value) >> umax_val;
-               }
+               /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
+                * be negative, then either:
+                * 1) src_reg might be zero, so the sign bit of the result is
+                *    unknown, so we lose our signed bounds
+                * 2) it's known negative, thus the unsigned bounds capture the
+                *    signed bounds
+                * 3) the signed bounds cross zero, so they tell us nothing
+                *    about the result
+                * If the value in dst_reg is known nonnegative, then again the
+                * unsigned bounts capture the signed bounds.
+                * Thus, in all cases it suffices to blow away our signed bounds
+                * and rely on inferring new ones from the unsigned bounds and
+                * var_off of the result.
+                */
+               dst_reg->smin_value = S64_MIN;
+               dst_reg->smax_value = S64_MAX;
                 if (src_known)
                         dst_reg->var_off = tnum_rshift(dst_reg->var_off,
                                                        umin_val);
@@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                 break;
         }
  
+       if (BPF_CLASS(insn->code) != BPF_ALU64) {
+               /* 32-bit ALU ops are (32,32)->32 */
+               coerce_reg_to_size(dst_reg, 4);
+               coerce_reg_to_size(&src_reg, 4);
+       }
+
         __reg_deduce_bounds(dst_reg);
         __reg_bound_offset(dst_reg);
         return 0;
@@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
         struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
         u8 opcode = BPF_OP(insn->code);
-       int rc;
  
         dst_reg = &regs[insn->dst_reg];
         src_reg = NULL;
@@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                 if (src_reg->type != SCALAR_VALUE) {
                         if (dst_reg->type != SCALAR_VALUE) {
                                 /* Combining two pointers by any ALU op yields
-                                * an arbitrary scalar.
+                                * an arbitrary scalar. Disallow all math except
+                                * pointer subtraction
                                  */
-                               if (!env->allow_ptr_leaks) {
-                                       verbose(env, "R%d pointer %s pointer prohibited\n",
-                                               insn->dst_reg,
-                                               bpf_alu_string[opcode >> 4]);
-                                       return -EACCES;
+                               if (opcode == BPF_SUB){
+                                       mark_reg_unknown(env, regs, insn->dst_reg);
+                                       return 0;
                                 }
-                               mark_reg_unknown(env, regs, insn->dst_reg);
-                               return 0;
+                               verbose(env, "R%d pointer %s pointer prohibited\n",
+                                       insn->dst_reg,
+                                       bpf_alu_string[opcode >> 4]);
+                               return -EACCES;
                         } else {
                                 /* scalar += pointer
                                  * This is legal, but we have to reverse our
                                  * src/dest handling in computing the range
                                  */
-                               rc = adjust_ptr_min_max_vals(env, insn,
-                                                            src_reg, dst_reg);
-                               if (rc == -EACCES && env->allow_ptr_leaks) {
-                                       /* scalar += unknown scalar */
-                                       __mark_reg_unknown(&off_reg);
-                                       return adjust_scalar_min_max_vals(
-                                                       env, insn,
-                                                       dst_reg, off_reg);
-                               }
-                               return rc;
+                               return adjust_ptr_min_max_vals(env, insn,
+                                                              src_reg, dst_reg);
                         }
                 } else if (ptr_reg) {
                         /* pointer += scalar */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    dst_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += scalar */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, *src_reg);
-                       }
-                       return rc;
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      dst_reg, src_reg);
                 }
         } else {
                 /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                 off_reg.type = SCALAR_VALUE;
                 __mark_reg_known(&off_reg, insn->imm);
                 src_reg = &off_reg;
-               if (ptr_reg) { /* pointer += K */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    ptr_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += K */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, off_reg);
-                       }
-                       return rc;
-               }
+               if (ptr_reg) /* pointer += K */
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      ptr_reg, src_reg);
         }
  
         /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                                         return -EACCES;
                                 }
                                 mark_reg_unknown(env, regs, insn->dst_reg);
-                               /* high 32 bits are known zero. */
-                               regs[insn->dst_reg].var_off = tnum_cast(
-                                               regs[insn->dst_reg].var_off, 4);
-                               __update_reg_bounds(&regs[insn->dst_reg]);
+                               coerce_reg_to_size(&regs[insn->dst_reg], 4);
                         }
                 } else {
                         /* case: R = imm
                          * remember the value we stored into this reg
                          */
                         regs[insn->dst_reg].type = SCALAR_VALUE;
-                       __mark_reg_known(regs + insn->dst_reg, insn->imm);
+                       if (BPF_CLASS(insn->code) == BPF_ALU64) {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                insn->imm);
+                       } else {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                (u32)insn->imm);
+                       }
                 }
  
         } else if (opcode > BPF_END) {
@@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
                         return range_within(rold, rcur) &&
                                tnum_in(rold->var_off, rcur->var_off);
                 } else {
-                       /* if we knew anything about the old value, we're not
-                        * equal, because we can't know anything about the
-                        * scalar value of the pointer in the new value.
+                       /* We're trying to use a pointer in place of a scalar.
+                        * Even if the scalar was unbounded, this could lead to
+                        * pointer leaks because scalars are allowed to leak
+                        * while pointers are not. We could make this safe in
+                        * special cases if root is calling us, but it's
+                        * probably not worth the hassle.
                          */
-                       return rold->umin_value == 0 &&
-                              rold->umax_value == U64_MAX &&
-                              rold->smin_value == S64_MIN &&
-                              rold->smax_value == S64_MAX &&
-                              tnum_is_unknown(rold->var_off);
+                       return false;
                 }
         case PTR_TO_MAP_VALUE:
                 /* If the new min/max/var_off satisfy the old ones and
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c

index 5f780d8f6a9d787ed22cf30bd8be66a0ae78f069..9caeda6102491db1a32c34bc62ef75da2cd3f6df 100644 (file)
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v)
  
         spin_lock_irq(&css_set_lock);
         rcu_read_lock();
-       cset = rcu_dereference(current->cgroups);
+       cset = task_css_set(current);
         refcnt = refcount_read(&cset->refcount);
         seq_printf(seq, "css_set %pK %d", cset, refcnt);
         if (refcnt > cset->nr_tasks)
@@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
  
         spin_lock_irq(&css_set_lock);
         rcu_read_lock();
-       cset = rcu_dereference(current->cgroups);
+       cset = task_css_set(current);
         list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
                 struct cgroup *c = link->cgrp;
  
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c

index 133b465691d6fe82462c33215639b6b7f97f04b7..1e111dd455c49cd9f8e3e7619fb4f11eaffb3c71 100644 (file)
--- a/kernel/cgroup/stat.c
+++ b/kernel/cgroup/stat.c
@@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp)
         }
  
         /* ->updated_children list is self terminated */
-       for_each_possible_cpu(cpu)
-               cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp;
+       for_each_possible_cpu(cpu) {
+               struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+
+               cstat->updated_children = cgrp;
+               u64_stats_init(&cstat->sync);
+       }
  
         prev_cputime_init(&cgrp->stat.prev_cputime);
  
diff --git a/kernel/exit.c b/kernel/exit.c

index 6b4298a41167c7f3f7ea7be1d85af9a08d9d44cb..df0c91d5606c2fdd80ea7bb42a2deea3c83eec4d 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1755,3 +1755,11 @@ Efault:
         return -EFAULT;
  }
  #endif
+
+__weak void abort(void)
+{
+       BUG();
+
+       /* if that doesn't kill us, halt */
+       panic("Oops failed to kill thread");
+}
diff --git a/kernel/fork.c b/kernel/fork.c

index 432eadf6b58c18d9de6a3d09f3fef36089b4b5a2..2295fc69717f6c3d877ef3cac15b55336d7746c6 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                         goto out;
         }
         /* a new mm has just been created */
-       arch_dup_mmap(oldmm, mm);
-       retval = 0;
+       retval = arch_dup_mmap(oldmm, mm);
  out:
         up_write(&mm->mmap_sem);
         flush_tlb_mm(oldmm);
diff --git a/kernel/groups.c b/kernel/groups.c

index e357bc800111043ed8d477d758bd245b8aa4c566..daae2f2dc6d4f64565112f0d7fea9c984887ce26 100644 (file)
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const void *_b)
         return gid_gt(a, b) - gid_lt(a, b);
  }
  
-static void groups_sort(struct group_info *group_info)
+void groups_sort(struct group_info *group_info)
  {
         sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
              gid_cmp, NULL);
  }
+EXPORT_SYMBOL(groups_sort);
  
  /* a simple bsearch */
  int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -122,7 +123,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
  void set_groups(struct cred *new, struct group_info *group_info)
  {
         put_group_info(new->group_info);
-       groups_sort(group_info);
         get_group_info(group_info);
         new->group_info = group_info;
  }
@@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
                 return retval;
         }
  
+       groups_sort(group_info);
         retval = set_current_groups(group_info);
         put_group_info(group_info);
  
diff --git a/kernel/kcov.c b/kernel/kcov.c

index 15f33faf4013bdfea16baf8b0b31053456606620..7594c033d98a39f3f51632a8f6f77b4440079ec4 100644 (file)
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2)
  }
  EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2);
  
-void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2)
+void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
  {
         write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_);
  }
@@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2)
  }
  EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2);
  
-void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2)
+void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
  {
         write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2,
                         _RET_IP_);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c

index 670d8d7d8087ccabb8d9fe2c8397134d1559bcd8..5fa1324a4f29a57901bdcf0ca81874a3dfd9a66c 100644 (file)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -57,10 +57,6 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/lock.h>
  
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-#include <linux/slab.h>
-#endif
-
  #ifdef CONFIG_PROVE_LOCKING
  int prove_locking = 1;
  module_param(prove_locking, int, 0644);
@@ -75,19 +71,6 @@ module_param(lock_stat, int, 0644);
  #define lock_stat 0
  #endif
  
-#ifdef CONFIG_BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
-static int crossrelease_fullstack = 1;
-#else
-static int crossrelease_fullstack;
-#endif
-static int __init allow_crossrelease_fullstack(char *str)
-{
-       crossrelease_fullstack = 1;
-       return 0;
-}
-
-early_param("crossrelease_fullstack", allow_crossrelease_fullstack);
-
  /*
   * lockdep_lock: protects the lockdep graph, the hashes and the
   *               class/list/hash allocators.
@@ -740,18 +723,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
         return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
  }
  
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-static void cross_init(struct lockdep_map *lock, int cross);
-static int cross_lock(struct lockdep_map *lock);
-static int lock_acquire_crosslock(struct held_lock *hlock);
-static int lock_release_crosslock(struct lockdep_map *lock);
-#else
-static inline void cross_init(struct lockdep_map *lock, int cross) {}
-static inline int cross_lock(struct lockdep_map *lock) { return 0; }
-static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
-static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
-#endif
-
  /*
   * Register a lock's class in the hash-table, if the class is not present
   * yet. Otherwise we look it up. We cache the result in the lock object
@@ -1151,41 +1122,22 @@ print_circular_lock_scenario(struct held_lock *src,
                 printk(KERN_CONT "\n\n");
         }
  
-       if (cross_lock(tgt->instance)) {
-               printk(" Possible unsafe locking scenario by crosslock:\n\n");
-               printk("       CPU0                    CPU1\n");
-               printk("       ----                    ----\n");
-               printk("  lock(");
-               __print_lock_name(parent);
-               printk(KERN_CONT ");\n");
-               printk("  lock(");
-               __print_lock_name(target);
-               printk(KERN_CONT ");\n");
-               printk("                               lock(");
-               __print_lock_name(source);
-               printk(KERN_CONT ");\n");
-               printk("                               unlock(");
-               __print_lock_name(target);
-               printk(KERN_CONT ");\n");
-               printk("\n *** DEADLOCK ***\n\n");
-       } else {
-               printk(" Possible unsafe locking scenario:\n\n");
-               printk("       CPU0                    CPU1\n");
-               printk("       ----                    ----\n");
-               printk("  lock(");
-               __print_lock_name(target);
-               printk(KERN_CONT ");\n");
-               printk("                               lock(");
-               __print_lock_name(parent);
-               printk(KERN_CONT ");\n");
-               printk("                               lock(");
-               __print_lock_name(target);
-               printk(KERN_CONT ");\n");
-               printk("  lock(");
-               __print_lock_name(source);
-               printk(KERN_CONT ");\n");
-               printk("\n *** DEADLOCK ***\n\n");
-       }
+       printk(" Possible unsafe locking scenario:\n\n");
+       printk("       CPU0                    CPU1\n");
+       printk("       ----                    ----\n");
+       printk("  lock(");
+       __print_lock_name(target);
+       printk(KERN_CONT ");\n");
+       printk("                               lock(");
+       __print_lock_name(parent);
+       printk(KERN_CONT ");\n");
+       printk("                               lock(");
+       __print_lock_name(target);
+       printk(KERN_CONT ");\n");
+       printk("  lock(");
+       __print_lock_name(source);
+       printk(KERN_CONT ");\n");
+       printk("\n *** DEADLOCK ***\n\n");
  }
  
  /*
@@ -1211,10 +1163,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
                 curr->comm, task_pid_nr(curr));
         print_lock(check_src);
  
-       if (cross_lock(check_tgt->instance))
-               pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
-       else
-               pr_warn("\nbut task is already holding lock:\n");
+       pr_warn("\nbut task is already holding lock:\n");
  
         print_lock(check_tgt);
         pr_warn("\nwhich lock already depends on the new lock.\n\n");
@@ -1244,9 +1193,7 @@ static noinline int print_circular_bug(struct lock_list *this,
         if (!debug_locks_off_graph_unlock() || debug_locks_silent)
                 return 0;
  
-       if (cross_lock(check_tgt->instance))
-               this->trace = *trace;
-       else if (!save_trace(&this->trace))
+       if (!save_trace(&this->trace))
                 return 0;
  
         depth = get_lock_depth(target);
@@ -1850,9 +1797,6 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
                 if (nest)
                         return 2;
  
-               if (cross_lock(prev->instance))
-                       continue;
-
                 return print_deadlock_bug(curr, prev, next);
         }
         return 1;
@@ -2018,31 +1962,26 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
         for (;;) {
                 int distance = curr->lockdep_depth - depth + 1;
                 hlock = curr->held_locks + depth - 1;
+
                 /*
-                * Only non-crosslock entries get new dependencies added.
-                * Crosslock entries will be added by commit later:
+                * Only non-recursive-read entries get new dependencies
+                * added:
                  */
-               if (!cross_lock(hlock->instance)) {
+               if (hlock->read != 2 && hlock->check) {
+                       int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace);
+                       if (!ret)
+                               return 0;
+
                         /*
-                        * Only non-recursive-read entries get new dependencies
-                        * added:
+                        * Stop after the first non-trylock entry,
+                        * as non-trylock entries have added their
+                        * own direct dependencies already, so this
+                        * lock is connected to them indirectly:
                          */
-                       if (hlock->read != 2 && hlock->check) {
-                               int ret = check_prev_add(curr, hlock, next,
-                                                        distance, &trace, save_trace);
-                               if (!ret)
-                                       return 0;
-
-                               /*
-                                * Stop after the first non-trylock entry,
-                                * as non-trylock entries have added their
-                                * own direct dependencies already, so this
-                                * lock is connected to them indirectly:
-                                */
-                               if (!hlock->trylock)
-                                       break;
-                       }
+                       if (!hlock->trylock)
+                               break;
                 }
+
                 depth--;
                 /*
                  * End of lock-stack?
@@ -3292,21 +3231,10 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
  void lockdep_init_map(struct lockdep_map *lock, const char *name,
                       struct lock_class_key *key, int subclass)
  {
-       cross_init(lock, 0);
         __lockdep_init_map(lock, name, key, subclass);
  }
  EXPORT_SYMBOL_GPL(lockdep_init_map);
  
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
-                     struct lock_class_key *key, int subclass)
-{
-       cross_init(lock, 1);
-       __lockdep_init_map(lock, name, key, subclass);
-}
-EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
-#endif
-
  struct lock_class_key __lockdep_no_validate__;
  EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
  
@@ -3362,7 +3290,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         int chain_head = 0;
         int class_idx;
         u64 chain_key;
-       int ret;
  
         if (unlikely(!debug_locks))
                 return 0;
@@ -3411,8 +3338,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
  
         class_idx = class - lock_classes + 1;
  
-       /* TODO: nest_lock is not implemented for crosslock yet. */
-       if (depth && !cross_lock(lock)) {
+       if (depth) {
                 hlock = curr->held_locks + depth - 1;
                 if (hlock->class_idx == class_idx && nest_lock) {
                         if (hlock->references) {
@@ -3500,14 +3426,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
                 return 0;
  
-       ret = lock_acquire_crosslock(hlock);
-       /*
-        * 2 means normal acquire operations are needed. Otherwise, it's
-        * ok just to return with '0:fail, 1:success'.
-        */
-       if (ret != 2)
-               return ret;
-
         curr->curr_chain_key = chain_key;
         curr->lockdep_depth++;
         check_chain_key(curr);
@@ -3745,19 +3663,11 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
         struct task_struct *curr = current;
         struct held_lock *hlock;
         unsigned int depth;
-       int ret, i;
+       int i;
  
         if (unlikely(!debug_locks))
                 return 0;
  
-       ret = lock_release_crosslock(lock);
-       /*
-        * 2 means normal release operations are needed. Otherwise, it's
-        * ok just to return with '0:fail, 1:success'.
-        */
-       if (ret != 2)
-               return ret;
-
         depth = curr->lockdep_depth;
         /*
          * So we're all set to release this lock.. wait what lock? We don't
@@ -4675,495 +4585,3 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
         dump_stack();
  }
  EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
-
-#ifdef CONFIG_LOCKDEP_CROSSRELEASE
-
-/*
- * Crossrelease works by recording a lock history for each thread and
- * connecting those historic locks that were taken after the
- * wait_for_completion() in the complete() context.
- *
- * Task-A                              Task-B
- *
- *                                     mutex_lock(&A);
- *                                     mutex_unlock(&A);
- *
- * wait_for_completion(&C);
- *   lock_acquire_crosslock();
- *     atomic_inc_return(&cross_gen_id);
- *                                |
- *                               |     mutex_lock(&B);
- *                               |     mutex_unlock(&B);
- *                                |
- *                               |     complete(&C);
- *                               `--     lock_commit_crosslock();
- *
- * Which will then add a dependency between B and C.
- */
-
-#define xhlock(i)         (current->xhlocks[(i) % MAX_XHLOCKS_NR])
-
-/*
- * Whenever a crosslock is held, cross_gen_id will be increased.
- */
-static atomic_t cross_gen_id; /* Can be wrapped */
-
-/*
- * Make an entry of the ring buffer invalid.
- */
-static inline void invalidate_xhlock(struct hist_lock *xhlock)
-{
-       /*
-        * Normally, xhlock->hlock.instance must be !NULL.
-        */
-       xhlock->hlock.instance = NULL;
-}
-
-/*
- * Lock history stacks; we have 2 nested lock history stacks:
- *
- *   HARD(IRQ)
- *   SOFT(IRQ)
- *
- * The thing is that once we complete a HARD/SOFT IRQ the future task locks
- * should not depend on any of the locks observed while running the IRQ.  So
- * what we do is rewind the history buffer and erase all our knowledge of that
- * temporal event.
- */
-
-void crossrelease_hist_start(enum xhlock_context_t c)
-{
-       struct task_struct *cur = current;
-
-       if (!cur->xhlocks)
-               return;
-
-       cur->xhlock_idx_hist[c] = cur->xhlock_idx;
-       cur->hist_id_save[c]    = cur->hist_id;
-}
-
-void crossrelease_hist_end(enum xhlock_context_t c)
-{
-       struct task_struct *cur = current;
-
-       if (cur->xhlocks) {
-               unsigned int idx = cur->xhlock_idx_hist[c];
-               struct hist_lock *h = &xhlock(idx);
-
-               cur->xhlock_idx = idx;
-
-               /* Check if the ring was overwritten. */
-               if (h->hist_id != cur->hist_id_save[c])
-                       invalidate_xhlock(h);
-       }
-}
-
-/*
- * lockdep_invariant_state() is used to annotate independence inside a task, to
- * make one task look like multiple independent 'tasks'.
- *
- * Take for instance workqueues; each work is independent of the last. The
- * completion of a future work does not depend on the completion of a past work
- * (in general). Therefore we must not carry that (lock) dependency across
- * works.
- *
- * This is true for many things; pretty much all kthreads fall into this
- * pattern, where they have an invariant state and future completions do not
- * depend on past completions. Its just that since they all have the 'same'
- * form -- the kthread does the same over and over -- it doesn't typically
- * matter.
- *
- * The same is true for system-calls, once a system call is completed (we've
- * returned to userspace) the next system call does not depend on the lock
- * history of the previous system call.
- *
- * They key property for independence, this invariant state, is that it must be
- * a point where we hold no locks and have no history. Because if we were to
- * hold locks, the restore at _end() would not necessarily recover it's history
- * entry. Similarly, independence per-definition means it does not depend on
- * prior state.
- */
-void lockdep_invariant_state(bool force)
-{
-       /*
-        * We call this at an invariant point, no current state, no history.
-        * Verify the former, enforce the latter.
-        */
-       WARN_ON_ONCE(!force && current->lockdep_depth);
-       if (current->xhlocks)
-               invalidate_xhlock(&xhlock(current->xhlock_idx));
-}
-
-static int cross_lock(struct lockdep_map *lock)
-{
-       return lock ? lock->cross : 0;
-}
-
-/*
- * This is needed to decide the relationship between wrapable variables.
- */
-static inline int before(unsigned int a, unsigned int b)
-{
-       return (int)(a - b) < 0;
-}
-
-static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
-{
-       return hlock_class(&xhlock->hlock);
-}
-
-static inline struct lock_class *xlock_class(struct cross_lock *xlock)
-{
-       return hlock_class(&xlock->hlock);
-}
-
-/*
- * Should we check a dependency with previous one?
- */
-static inline int depend_before(struct held_lock *hlock)
-{
-       return hlock->read != 2 && hlock->check && !hlock->trylock;
-}
-
-/*
- * Should we check a dependency with next one?
- */
-static inline int depend_after(struct held_lock *hlock)
-{
-       return hlock->read != 2 && hlock->check;
-}
-
-/*
- * Check if the xhlock is valid, which would be false if,
- *
- *    1. Has not used after initializaion yet.
- *    2. Got invalidated.
- *
- * Remind hist_lock is implemented as a ring buffer.
- */
-static inline int xhlock_valid(struct hist_lock *xhlock)
-{
-       /*
-        * xhlock->hlock.instance must be !NULL.
-        */
-       return !!xhlock->hlock.instance;
-}
-
-/*
- * Record a hist_lock entry.
- *
- * Irq disable is only required.
- */
-static void add_xhlock(struct held_lock *hlock)
-{
-       unsigned int idx = ++current->xhlock_idx;
-       struct hist_lock *xhlock = &xhlock(idx);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
-       /*
-        * This can be done locklessly because they are all task-local
-        * state, we must however ensure IRQs are disabled.
-        */
-       WARN_ON_ONCE(!irqs_disabled());
-#endif
-
-       /* Initialize hist_lock's members */
-       xhlock->hlock = *hlock;
-       xhlock->hist_id = ++current->hist_id;
-
-       xhlock->trace.nr_entries = 0;
-       xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
-       xhlock->trace.entries = xhlock->trace_entries;
-
-       if (crossrelease_fullstack) {
-               xhlock->trace.skip = 3;
-               save_stack_trace(&xhlock->trace);
-       } else {
-               xhlock->trace.nr_entries = 1;
-               xhlock->trace.entries[0] = hlock->acquire_ip;
-       }
-}
-
-static inline int same_context_xhlock(struct hist_lock *xhlock)
-{
-       return xhlock->hlock.irq_context == task_irq_context(current);
-}
-
-/*
- * This should be lockless as far as possible because this would be
- * called very frequently.
- */
-static void check_add_xhlock(struct held_lock *hlock)
-{
-       /*
-        * Record a hist_lock, only in case that acquisitions ahead
-        * could depend on the held_lock. For example, if the held_lock
-        * is trylock then acquisitions ahead never depends on that.
-        * In that case, we don't need to record it. Just return.
-        */
-       if (!current->xhlocks || !depend_before(hlock))
-               return;
-
-       add_xhlock(hlock);
-}
-
-/*
- * For crosslock.
- */
-static int add_xlock(struct held_lock *hlock)
-{
-       struct cross_lock *xlock;
-       unsigned int gen_id;
-
-       if (!graph_lock())
-               return 0;
-
-       xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
-
-       /*
-        * When acquisitions for a crosslock are overlapped, we use
-        * nr_acquire to perform commit for them, based on cross_gen_id
-        * of the first acquisition, which allows to add additional
-        * dependencies.
-        *
-        * Moreover, when no acquisition of a crosslock is in progress,
-        * we should not perform commit because the lock might not exist
-        * any more, which might cause incorrect memory access. So we
-        * have to track the number of acquisitions of a crosslock.
-        *
-        * depend_after() is necessary to initialize only the first
-        * valid xlock so that the xlock can be used on its commit.
-        */
-       if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
-               goto unlock;
-
-       gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
-       xlock->hlock = *hlock;
-       xlock->hlock.gen_id = gen_id;
-unlock:
-       graph_unlock();
-       return 1;
-}
-
-/*
- * Called for both normal and crosslock acquires. Normal locks will be
- * pushed on the hist_lock queue. Cross locks will record state and
- * stop regular lock_acquire() to avoid being placed on the held_lock
- * stack.
- *
- * Return: 0 - failure;
- *         1 - crosslock, done;
- *         2 - normal lock, continue to held_lock[] ops.
- */
-static int lock_acquire_crosslock(struct held_lock *hlock)
-{
-       /*
-        *      CONTEXT 1               CONTEXT 2
-        *      ---------               ---------
-        *      lock A (cross)
-        *      X = atomic_inc_return(&cross_gen_id)
-        *      ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-        *                              Y = atomic_read_acquire(&cross_gen_id)
-        *                              lock B
-        *
-        * atomic_read_acquire() is for ordering between A and B,
-        * IOW, A happens before B, when CONTEXT 2 see Y >= X.
-        *
-        * Pairs with atomic_inc_return() in add_xlock().
-        */
-       hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
-
-       if (cross_lock(hlock->instance))
-               return add_xlock(hlock);
-
-       check_add_xhlock(hlock);
-       return 2;
-}
-
-static int copy_trace(struct stack_trace *trace)
-{
-       unsigned long *buf = stack_trace + nr_stack_trace_entries;
-       unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
-       unsigned int nr = min(max_nr, trace->nr_entries);
-
-       trace->nr_entries = nr;
-       memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
-       trace->entries = buf;
-       nr_stack_trace_entries += nr;
-
-       if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
-               if (!debug_locks_off_graph_unlock())
-                       return 0;
-
-               print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
-               dump_stack();
-
-               return 0;
-       }
-
-       return 1;
-}
-
-static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
-{
-       unsigned int xid, pid;
-       u64 chain_key;
-
-       xid = xlock_class(xlock) - lock_classes;
-       chain_key = iterate_chain_key((u64)0, xid);
-       pid = xhlock_class(xhlock) - lock_classes;
-       chain_key = iterate_chain_key(chain_key, pid);
-
-       if (lookup_chain_cache(chain_key))
-               return 1;
-
-       if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
-                               chain_key))
-               return 0;
-
-       if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
-                           &xhlock->trace, copy_trace))
-               return 0;
-
-       return 1;
-}
-
-static void commit_xhlocks(struct cross_lock *xlock)
-{
-       unsigned int cur = current->xhlock_idx;
-       unsigned int prev_hist_id = xhlock(cur).hist_id;
-       unsigned int i;
-
-       if (!graph_lock())
-               return;
-
-       if (xlock->nr_acquire) {
-               for (i = 0; i < MAX_XHLOCKS_NR; i++) {
-                       struct hist_lock *xhlock = &xhlock(cur - i);
-
-                       if (!xhlock_valid(xhlock))
-                               break;
-
-                       if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
-                               break;
-
-                       if (!same_context_xhlock(xhlock))
-                               break;
-
-                       /*
-                        * Filter out the cases where the ring buffer was
-                        * overwritten and the current entry has a bigger
-                        * hist_id than the previous one, which is impossible
-                        * otherwise:
-                        */
-                       if (unlikely(before(prev_hist_id, xhlock->hist_id)))
-                               break;
-
-                       prev_hist_id = xhlock->hist_id;
-
-                       /*
-                        * commit_xhlock() returns 0 with graph_lock already
-                        * released if fail.
-                        */
-                       if (!commit_xhlock(xlock, xhlock))
-                               return;
-               }
-       }
-
-       graph_unlock();
-}
-
-void lock_commit_crosslock(struct lockdep_map *lock)
-{
-       struct cross_lock *xlock;
-       unsigned long flags;
-
-       if (unlikely(!debug_locks || current->lockdep_recursion))
-               return;
-
-       if (!current->xhlocks)
-               return;
-
-       /*
-        * Do commit hist_locks with the cross_lock, only in case that
-        * the cross_lock could depend on acquisitions after that.
-        *
-        * For example, if the cross_lock does not have the 'check' flag
-        * then we don't need to check dependencies and commit for that.
-        * Just skip it. In that case, of course, the cross_lock does
-        * not depend on acquisitions ahead, either.
-        *
-        * WARNING: Don't do that in add_xlock() in advance. When an
-        * acquisition context is different from the commit context,
-        * invalid(skipped) cross_lock might be accessed.
-        */
-       if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
-               return;
-
-       raw_local_irq_save(flags);
-       check_flags(flags);
-       current->lockdep_recursion = 1;
-       xlock = &((struct lockdep_map_cross *)lock)->xlock;
-       commit_xhlocks(xlock);
-       current->lockdep_recursion = 0;
-       raw_local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(lock_commit_crosslock);
-
-/*
- * Return: 0 - failure;
- *         1 - crosslock, done;
- *         2 - normal lock, continue to held_lock[] ops.
- */
-static int lock_release_crosslock(struct lockdep_map *lock)
-{
-       if (cross_lock(lock)) {
-               if (!graph_lock())
-                       return 0;
-               ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
-               graph_unlock();
-               return 1;
-       }
-       return 2;
-}
-
-static void cross_init(struct lockdep_map *lock, int cross)
-{
-       if (cross)
-               ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
-
-       lock->cross = cross;
-
-       /*
-        * Crossrelease assumes that the ring buffer size of xhlocks
-        * is aligned with power of 2. So force it on build.
-        */
-       BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
-}
-
-void lockdep_init_task(struct task_struct *task)
-{
-       int i;
-
-       task->xhlock_idx = UINT_MAX;
-       task->hist_id = 0;
-
-       for (i = 0; i < XHLOCK_CTX_NR; i++) {
-               task->xhlock_idx_hist[i] = UINT_MAX;
-               task->hist_id_save[i] = 0;
-       }
-
-       task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
-                               GFP_KERNEL);
-}
-
-void lockdep_free_task(struct task_struct *task)
-{
-       if (task->xhlocks) {
-               void *tmp = task->xhlocks;
-               /* Diable crossrelease for current */
-               task->xhlocks = NULL;
-               kfree(tmp);
-       }
-}
-#endif
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c

index 1fd1a7543cdddf39197acaa3882a0f9de7ddb3ab..936f3d14dd6bfeda3ef7921266fef5dc5b36e2a8 100644 (file)
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock)                        \
                         break;                                          \
                 preempt_enable();                                       \
                                                                         \
-               if (!(lock)->break_lock)                                \
-                       (lock)->break_lock = 1;                         \
-               while ((lock)->break_lock)                              \
-                       arch_##op##_relax(&lock->raw_lock);             \
+               arch_##op##_relax(&lock->raw_lock);                     \
         }                                                               \
-       (lock)->break_lock = 0;                                         \
  }                                                                      \
                                                                         \
  unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
@@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)       \
                 local_irq_restore(flags);                               \
                 preempt_enable();                                       \
                                                                         \
-               if (!(lock)->break_lock)                                \
-                       (lock)->break_lock = 1;                         \
-               while ((lock)->break_lock)                              \
-                       arch_##op##_relax(&lock->raw_lock);             \
+               arch_##op##_relax(&lock->raw_lock);                     \
         }                                                               \
-       (lock)->break_lock = 0;                                         \
+                                                                       \
         return flags;                                                   \
  }                                                                      \
                                                                         \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 75554f366fd3aa20339a00a03d7897cf42bc7d97..644fa2e3d993b5ef1bd1c57e4286168daadf14ff 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5097,17 +5097,6 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
         return ret;
  }
  
-/**
- * sys_sched_rr_get_interval - return the default timeslice of a process.
- * @pid: pid of the process.
- * @interval: userspace pointer to the timeslice value.
- *
- * this syscall writes the default timeslice value of a given process
- * into the user-space timespec buffer. A value of '0' means infinity.
- *
- * Return: On success, 0 and the timeslice is in @interval. Otherwise,
- * an error code.
- */
  static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
  {
         struct task_struct *p;
@@ -5144,6 +5133,17 @@ out_unlock:
         return retval;
  }
  
+/**
+ * sys_sched_rr_get_interval - return the default timeslice of a process.
+ * @pid: pid of the process.
+ * @interval: userspace pointer to the timeslice value.
+ *
+ * this syscall writes the default timeslice value of a given process
+ * into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
+ */
  SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
                 struct timespec __user *, interval)
  {
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index 4056c19ca3f00efbc7592a1b4b071426fabf2124..665ace2fc55885e0a4c0621e4bdbff2a5870b61f 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2034,8 +2034,9 @@ static void pull_rt_task(struct rq *this_rq)
         bool resched = false;
         struct task_struct *p;
         struct rq *src_rq;
+       int rt_overload_count = rt_overloaded(this_rq);
  
-       if (likely(!rt_overloaded(this_rq)))
+       if (likely(!rt_overload_count))
                 return;
  
         /*
@@ -2044,6 +2045,11 @@ static void pull_rt_task(struct rq *this_rq)
          */
         smp_rmb();
  
+       /* If we are the only overloaded CPU do nothing */
+       if (rt_overload_count == 1 &&
+           cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
+               return;
+
  #ifdef HAVE_RT_PUSH_IPI
         if (sched_feat(RT_PUSH_IPI)) {
                 tell_cpu_to_push(this_rq);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c

index 13d6881f908b7f91a5669264a060f59d7990f801..ec999f32c84058a0624d55ff3ee26a4fac63eb57 100644 (file)
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event)
  {
         struct task_struct *rtn = current->group_leader;
  
-       if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
-               (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
-                !same_thread_group(rtn, current) ||
-                (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
+       switch (event->sigev_notify) {
+       case SIGEV_SIGNAL | SIGEV_THREAD_ID:
+               rtn = find_task_by_vpid(event->sigev_notify_thread_id);
+               if (!rtn || !same_thread_group(rtn, current))
+                       return NULL;
+               /* FALLTHRU */
+       case SIGEV_SIGNAL:
+       case SIGEV_THREAD:
+               if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
+                       return NULL;
+               /* FALLTHRU */
+       case SIGEV_NONE:
+               return task_pid(rtn);
+       default:
                 return NULL;
-
-       if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
-           ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
-               return NULL;
-
-       return task_pid(rtn);
+       }
  }
  
  static struct k_itimer * alloc_posix_timer(void)
@@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
         struct timespec64 ts64;
         bool sig_none;
  
-       sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE;
+       sig_none = timr->it_sigev_notify == SIGEV_NONE;
         iv = timr->it_interval;
  
         /* interval timer ? */
@@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags,
  
         timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
         expires = timespec64_to_ktime(new_setting->it_value);
-       sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE;
+       sigev_none = timr->it_sigev_notify == SIGEV_NONE;
  
         kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
         timr->it_active = !sigev_none;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index af7dad126c13cecbe73f5d797778f005b5838377..904c952ac3833bdfd0e017dff437d26f4c545e3d 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -164,6 +164,7 @@ config PREEMPTIRQ_EVENTS
         bool "Enable trace events for preempt and irq disable/enable"
         select TRACE_IRQFLAGS
         depends on DEBUG_PREEMPT || !PROVE_LOCKING
+       depends on TRACING
         default n
         help
           Enable tracing of disable and enable events for preemption and irqs.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 0ce99c379c3089a4857d082b64ede99feeea5282..40207c2a41134851d5016fe9cbdc678e42606868 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
         .arg4_type      = ARG_CONST_SIZE,
  };
  
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
+static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
  
  static __always_inline u64
  __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
-                       u64 flags, struct perf_raw_record *raw)
+                       u64 flags, struct perf_sample_data *sd)
  {
         struct bpf_array *array = container_of(map, struct bpf_array, map);
-       struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
         unsigned int cpu = smp_processor_id();
         u64 index = flags & BPF_F_INDEX_MASK;
         struct bpf_event_entry *ee;
@@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
         if (unlikely(event->oncpu != cpu))
                 return -EOPNOTSUPP;
  
-       perf_sample_data_init(sd, 0, 0);
-       sd->raw = raw;
         perf_event_output(event, sd, regs);
         return 0;
  }
@@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
  BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
            u64, flags, void *, data, u64, size)
  {
+       struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
         struct perf_raw_record raw = {
                 .frag = {
                         .size = size,
@@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
         if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
                 return -EINVAL;
  
-       return __bpf_perf_event_output(regs, map, flags, &raw);
+       perf_sample_data_init(sd, 0, 0);
+       sd->raw = &raw;
+
+       return __bpf_perf_event_output(regs, map, flags, sd);
  }
  
  static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
  };
  
  static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
+static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
  
  u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                      void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
  {
+       struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
         struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
         struct perf_raw_frag frag = {
                 .copy           = ctx_copy,
@@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
         };
  
         perf_fetch_caller_regs(regs);
+       perf_sample_data_init(sd, 0, 0);
+       sd->raw = &raw;
  
-       return __bpf_perf_event_output(regs, map, flags, &raw);
+       return __bpf_perf_event_output(regs, map, flags, sd);
  }
  
  BPF_CALL_0(bpf_get_current_task)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c

index 91874a95060de5de11aa47d3fbddb8c4980a0da8..c87766c1c20446de2d191edda885669d447adecc 100644 (file)
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1799,12 +1799,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
  }
  EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
  
-static __always_inline void *
-__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
-{
-       return bpage->data + index;
-}
-
  static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
  {
         return bpage->page->data + index;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 73e67b68c53b47d5b422970cd0dee1d0bec27002..59518b8126d04b4f1f62a526571490dda8398e3b 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -362,7 +362,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
  }
  
  /**
- * trace_pid_filter_add_remove - Add or remove a task from a pid_list
+ * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
   * @pid_list: The list to modify
   * @self: The current task for fork or NULL for exit
   * @task: The task to add or remove
@@ -925,7 +925,7 @@ static void tracing_snapshot_instance(struct trace_array *tr)
  }
  
  /**
- * trace_snapshot - take a snapshot of the current buffer.
+ * tracing_snapshot - take a snapshot of the current buffer.
   *
   * This causes a swap between the snapshot buffer and the current live
   * tracing buffer. You can use this to take snapshots of the live
@@ -1004,9 +1004,9 @@ int tracing_alloc_snapshot(void)
  EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
  
  /**
- * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
+ * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
   *
- * This is similar to trace_snapshot(), but it will allocate the
+ * This is similar to tracing_snapshot(), but it will allocate the
   * snapshot buffer if it isn't already allocated. Use this only
   * where it is safe to sleep, as the allocation may sleep.
   *
@@ -1303,7 +1303,7 @@ unsigned long __read_mostly       tracing_thresh;
  /*
   * Copy the new maximum trace into the separate maximum-trace
   * structure. (this way the maximum trace is permanently saved,
- * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
+ * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
   */
  static void
  __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2415,7 +2415,7 @@ trace_process_export(struct trace_export *export,
  
         entry = ring_buffer_event_data(event);
         size = ring_buffer_event_length(event);
-       export->write(entry, size);
+       export->write(export, entry, size);
  }
  
  static DEFINE_MUTEX(ftrace_export_lock);
@@ -4178,37 +4178,30 @@ static const struct file_operations show_traces_fops = {
         .llseek         = seq_lseek,
  };
  
-/*
- * The tracer itself will not take this lock, but still we want
- * to provide a consistent cpumask to user-space:
- */
-static DEFINE_MUTEX(tracing_cpumask_update_lock);
-
-/*
- * Temporary storage for the character representation of the
- * CPU bitmask (and one more byte for the newline):
- */
-static char mask_str[NR_CPUS + 1];
-
  static ssize_t
  tracing_cpumask_read(struct file *filp, char __user *ubuf,
                      size_t count, loff_t *ppos)
  {
         struct trace_array *tr = file_inode(filp)->i_private;
+       char *mask_str;
         int len;
  
-       mutex_lock(&tracing_cpumask_update_lock);
+       len = snprintf(NULL, 0, "%*pb\n",
+                      cpumask_pr_args(tr->tracing_cpumask)) + 1;
+       mask_str = kmalloc(len, GFP_KERNEL);
+       if (!mask_str)
+               return -ENOMEM;
  
-       len = snprintf(mask_str, count, "%*pb\n",
+       len = snprintf(mask_str, len, "%*pb\n",
                        cpumask_pr_args(tr->tracing_cpumask));
         if (len >= count) {
                 count = -EINVAL;
                 goto out_err;
         }
-       count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
+       count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
  
  out_err:
-       mutex_unlock(&tracing_cpumask_update_lock);
+       kfree(mask_str);
  
         return count;
  }
@@ -4228,8 +4221,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
         if (err)
                 goto err_unlock;
  
-       mutex_lock(&tracing_cpumask_update_lock);
-
         local_irq_disable();
         arch_spin_lock(&tr->max_lock);
         for_each_tracing_cpu(cpu) {
@@ -4252,8 +4243,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
         local_irq_enable();
  
         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
-
-       mutex_unlock(&tracing_cpumask_update_lock);
         free_cpumask_var(tracing_cpumask_new);
  
         return count;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c

index 734accc02418930280a5248013fa6fbd4a5868e2..3c7bfc4bf5e9981b687ca8dd4ed1cf890b38ee12 100644 (file)
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -209,6 +209,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
         if (__this_cpu_read(disable_stack_tracer) != 1)
                 goto out;
  
+       /* If rcu is not watching, then save stack trace can fail */
+       if (!rcu_is_watching())
+               goto out;
+
         ip += MCOUNT_INSN_SIZE;
  
         check_stack(ip, &stack);
diff --git a/kernel/uid16.c b/kernel/uid16.c

index ce74a4901d2b058595af031cf6c408c48dc5e1f1..ef1da2a5f9bd00689e4f78adffb04c867d8395cb 100644 (file)
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
                 return retval;
         }
  
+       groups_sort(group_info);
         retval = set_current_groups(group_info);
         put_group_info(group_info);
  
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 8fdb710bfdd732fc3e6bfaa3110264de70c9af1d..43d18cb46308385865d14ce40906c4646d378063 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -38,7 +38,6 @@
  #include <linux/hardirq.h>
  #include <linux/mempolicy.h>
  #include <linux/freezer.h>
-#include <linux/kallsyms.h>
  #include <linux/debug_locks.h>
  #include <linux/lockdep.h>
  #include <linux/idr.h>
@@ -48,6 +47,7 @@
  #include <linux/nodemask.h>
  #include <linux/moduleparam.h>
  #include <linux/uaccess.h>
+#include <linux/sched/isolation.h>
  
  #include "workqueue_internal.h"
  
@@ -1634,7 +1634,7 @@ static void worker_enter_idle(struct worker *worker)
                 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
  
         /*
-        * Sanity check nr_running.  Because wq_unbind_fn() releases
+        * Sanity check nr_running.  Because unbind_workers() releases
          * pool->lock between setting %WORKER_UNBOUND and zapping
          * nr_running, the warning may trigger spuriously.  Check iff
          * unbind is not in progress.
@@ -4510,9 +4510,8 @@ void show_workqueue_state(void)
   * cpu comes back online.
   */
  
-static void wq_unbind_fn(struct work_struct *work)
+static void unbind_workers(int cpu)
  {
-       int cpu = smp_processor_id();
         struct worker_pool *pool;
         struct worker *worker;
  
@@ -4589,16 +4588,6 @@ static void rebind_workers(struct worker_pool *pool)
  
         spin_lock_irq(&pool->lock);
  
-       /*
-        * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
-        * w/o preceding DOWN_PREPARE.  Work around it.  CPU hotplug is
-        * being reworked and this can go away in time.
-        */
-       if (!(pool->flags & POOL_DISASSOCIATED)) {
-               spin_unlock_irq(&pool->lock);
-               return;
-       }
-
         pool->flags &= ~POOL_DISASSOCIATED;
  
         for_each_pool_worker(worker, pool) {
@@ -4709,12 +4698,13 @@ int workqueue_online_cpu(unsigned int cpu)
  
  int workqueue_offline_cpu(unsigned int cpu)
  {
-       struct work_struct unbind_work;
         struct workqueue_struct *wq;
  
         /* unbinding per-cpu workers should happen on the local CPU */
-       INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
-       queue_work_on(cpu, system_highpri_wq, &unbind_work);
+       if (WARN_ON(cpu != smp_processor_id()))
+               return -1;
+
+       unbind_workers(cpu);
  
         /* update NUMA affinity of unbound workqueues */
         mutex_lock(&wq_pool_mutex);
@@ -4722,9 +4712,6 @@ int workqueue_offline_cpu(unsigned int cpu)
                 wq_update_unbound_numa(wq, cpu, false);
         mutex_unlock(&wq_pool_mutex);
  
-       /* wait for per-cpu unbinding to finish */
-       flush_work(&unbind_work);
-       destroy_work_on_stack(&unbind_work);
         return 0;
  }
  
@@ -4957,6 +4944,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
         if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
                 return -ENOMEM;
  
+       /*
+        * Not excluding isolated cpus on purpose.
+        * If the user wishes to include them, we allow that.
+        */
         cpumask_and(cpumask, cpumask, cpu_possible_mask);
         if (!cpumask_empty(cpumask)) {
                 apply_wqattrs_lock();
@@ -5555,7 +5546,7 @@ int __init workqueue_init_early(void)
         WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
  
         BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
-       cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+       cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
  
         pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
  
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 947d3e2ed5c2f1a7fa1e001e4bf56b9c7a2b9d49..9d5b78aad4c5bcd59a927c654e8010e54269d750 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1099,8 +1099,6 @@ config PROVE_LOCKING
         select DEBUG_MUTEXES
         select DEBUG_RT_MUTEXES if RT_MUTEXES
         select DEBUG_LOCK_ALLOC
-       select LOCKDEP_CROSSRELEASE
-       select LOCKDEP_COMPLETIONS
         select TRACE_IRQFLAGS
         default n
         help
@@ -1170,37 +1168,6 @@ config LOCK_STAT
          CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
          (CONFIG_LOCKDEP defines "acquire" and "release" events.)
  
-config LOCKDEP_CROSSRELEASE
-       bool
-       help
-        This makes lockdep work for crosslock which is a lock allowed to
-        be released in a different context from the acquisition context.
-        Normally a lock must be released in the context acquiring the lock.
-        However, relexing this constraint helps synchronization primitives
-        such as page locks or completions can use the lock correctness
-        detector, lockdep.
-
-config LOCKDEP_COMPLETIONS
-       bool
-       help
-        A deadlock caused by wait_for_completion() and complete() can be
-        detected by lockdep using crossrelease feature.
-
-config BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
-       bool "Enable the boot parameter, crossrelease_fullstack"
-       depends on LOCKDEP_CROSSRELEASE
-       default n
-       help
-        The lockdep "cross-release" feature needs to record stack traces
-        (of calling functions) for all acquisitions, for eventual later
-        use during analysis. By default only a single caller is recorded,
-        because the unwind operation can be very expensive with deeper
-        stack chains.
-
-        However a boot parameter, crossrelease_fullstack, was
-        introduced since sometimes deeper traces are required for full
-        analysis. This option turns on the boot parameter.
-
  config DEBUG_LOCKDEP
         bool "Lock dependency engine debugging"
         depends on DEBUG_KERNEL && LOCKDEP
diff --git a/lib/rbtree.c b/lib/rbtree.c

index ba4a9d165f1bed3c39651387def0008fc793fbd6..d3ff682fd4b8dac865276f5fefc7a1108c1bdb37 100644 (file)
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -603,6 +603,16 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
  }
  EXPORT_SYMBOL(rb_replace_node);
  
+void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new,
+                           struct rb_root_cached *root)
+{
+       rb_replace_node(victim, new, &root->rb_root);
+
+       if (root->rb_leftmost == victim)
+               root->rb_leftmost = new;
+}
+EXPORT_SYMBOL(rb_replace_node_cached);
+
  void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
                          struct rb_root *root)
  {
diff --git a/lib/test_bpf.c b/lib/test_bpf.c

index aa8812ae6776ee31712fe88c58da4048ff9c31e4..9e97480892709957e127e9941710ce45f11ff724 100644 (file)
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -435,6 +435,41 @@ loop:
         return 0;
  }
  
+static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+{
+       struct bpf_insn *insn;
+
+       insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       /* Due to func address being non-const, we need to
+        * assemble this here.
+        */
+       insn[0] = BPF_MOV64_REG(R6, R1);
+       insn[1] = BPF_LD_ABS(BPF_B, 0);
+       insn[2] = BPF_LD_ABS(BPF_H, 0);
+       insn[3] = BPF_LD_ABS(BPF_W, 0);
+       insn[4] = BPF_MOV64_REG(R7, R6);
+       insn[5] = BPF_MOV64_IMM(R6, 0);
+       insn[6] = BPF_MOV64_REG(R1, R7);
+       insn[7] = BPF_MOV64_IMM(R2, 1);
+       insn[8] = BPF_MOV64_IMM(R3, 2);
+       insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                              bpf_skb_vlan_push_proto.func - __bpf_call_base);
+       insn[10] = BPF_MOV64_REG(R6, R7);
+       insn[11] = BPF_LD_ABS(BPF_B, 0);
+       insn[12] = BPF_LD_ABS(BPF_H, 0);
+       insn[13] = BPF_LD_ABS(BPF_W, 0);
+       insn[14] = BPF_MOV64_IMM(R0, 42);
+       insn[15] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = 16;
+
+       return 0;
+}
+
  static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
  {
         unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
                 {},
                 { {0x1, 0x42 } },
         },
+       {
+               "LD_ABS with helper changing skb data",
+               { },
+               INTERNAL,
+               { 0x34 },
+               { { ETH_HLEN, 42 } },
+               .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
+       },
  };
  
  static struct net_device dev;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index 84b2dc76f140e922e2ed0d7c4d545b4d4ddf496d..b5f940ce0143ba061a183db0df3ef0dc17f57c72 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
         if (IS_ERR(dev))
                 return PTR_ERR(dev);
  
-       if (bdi_debug_register(bdi, dev_name(dev))) {
-               device_destroy(bdi_class, dev->devt);
-               return -ENOMEM;
-       }
         cgwb_bdi_register(bdi);
         bdi->dev = dev;
  
+       bdi_debug_register(bdi, dev_name(dev));
         set_bit(WB_registered, &bdi->wb.state);
  
         spin_lock_bh(&bdi_lock);
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c

index d04ac1ec05598d64c2acfd7a9b95b4b4a7a81c36..1826f191e72c836c59970006528e39bd791209bc 100644 (file)
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -111,7 +111,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
         enum fixed_addresses idx;
         int i, slot;
  
-       WARN_ON(system_state != SYSTEM_BOOTING);
+       WARN_ON(system_state >= SYSTEM_RUNNING);
  
         slot = -1;
         for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
diff --git a/mm/frame_vector.c b/mm/frame_vector.c

index 297c7238f7d4094a6ac4ab0dc72e04abb870972f..c64dca6e27c28c915ad4ce662de764d6da5ef6f1 100644 (file)
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -62,8 +62,10 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
          * get_user_pages_longterm() and disallow it for filesystem-dax
          * mappings.
          */
-       if (vma_is_fsdax(vma))
-               return -EOPNOTSUPP;
+       if (vma_is_fsdax(vma)) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
  
         if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
                 vec->got_ref = true;
diff --git a/mm/gup.c b/mm/gup.c

index d3fb60e5bfacd4c733957dc526c28c41bd2321d1..e0d82b6706d72d82637bca5eaef1e35e15a1abdf 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
   */
  static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
  {
-       return pte_access_permitted(pte, WRITE) ||
+       return pte_write(pte) ||
                 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
  }
  
diff --git a/mm/hmm.c b/mm/hmm.c

index 3a5c172af56039bb26007ea4cc5ec4ca0a9bf659..ea19742a5d60b1a6270629a024d88a13b9c5f3c1 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -391,11 +391,11 @@ again:
                 if (pmd_protnone(pmd))
                         return hmm_vma_walk_clear(start, end, walk);
  
-               if (!pmd_access_permitted(pmd, write_fault))
+               if (write_fault && !pmd_write(pmd))
                         return hmm_vma_walk_clear(start, end, walk);
  
                 pfn = pmd_pfn(pmd) + pte_index(addr);
-               flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0;
+               flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
                 for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
                         pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
                 return 0;
@@ -456,11 +456,11 @@ again:
                         continue;
                 }
  
-               if (!pte_access_permitted(pte, write_fault))
+               if (write_fault && !pte_write(pte))
                         goto fault;
  
                 pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
-               pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0;
+               pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
                 continue;
  
  fault:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 2f2f5e77490278f58c6e9a923899255efff77551..0e7ded98d114d184877d2fc9bd0f02c3187f2ed5 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
          */
         WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
  
-       if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE))
+       if (flags & FOLL_WRITE && !pmd_write(*pmd))
                 return NULL;
  
         if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
  
         assert_spin_locked(pud_lockptr(mm, pud));
  
-       if (!pud_access_permitted(*pud, flags & FOLL_WRITE))
+       if (flags & FOLL_WRITE && !pud_write(*pud))
                 return NULL;
  
         if (pud_present(*pud) && pud_devmap(*pud))
@@ -1386,7 +1386,7 @@ out_unlock:
   */
  static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
  {
-       return pmd_access_permitted(pmd, WRITE) ||
+       return pmd_write(pmd) ||
                ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
  }
  
diff --git a/mm/kmemleak.c b/mm/kmemleak.c

index 3d4781756d50fef924f52c0d9cb6cb0cbddd479f..d73c14294f3a61c2385741b447aa31d203a2bc72 100644 (file)
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1523,7 +1523,7 @@ static void kmemleak_scan(void)
                         if (page_count(page) == 0)
                                 continue;
                         scan_block(page, page + 1, NULL);
-                       if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page))))
+                       if (!(pfn & 63))
                                 cond_resched();
                 }
         }
diff --git a/mm/memory.c b/mm/memory.c

index 5eb3d2524bdc28239b33a0ac6e385fa5a5b9aaf9..ca5674cbaff2b65c4e51086e5922fbbd274f2cfa 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3831,7 +3831,8 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
         return VM_FAULT_FALLBACK;
  }
  
-static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
+/* `inline' is required to avoid gcc 4.1.2 build error */
+static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
  {
         if (vma_is_anonymous(vmf->vma))
                 return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3948,7 +3949,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
         if (unlikely(!pte_same(*vmf->pte, entry)))
                 goto unlock;
         if (vmf->flags & FAULT_FLAG_WRITE) {
-               if (!pte_access_permitted(entry, WRITE))
+               if (!pte_write(entry))
                         return do_wp_page(vmf);
                 entry = pte_mkdirty(entry);
         }
@@ -4013,7 +4014,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  
                         /* NUMA case for anonymous PUDs would go here */
  
-                       if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
+                       if (dirty && !pud_write(orig_pud)) {
                                 ret = wp_huge_pud(&vmf, orig_pud);
                                 if (!(ret & VM_FAULT_FALLBACK))
                                         return ret;
@@ -4046,7 +4047,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
                         if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
                                 return do_huge_pmd_numa_page(&vmf, orig_pmd);
  
-                       if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
+                       if (dirty && !pmd_write(orig_pmd)) {
                                 ret = wp_huge_pmd(&vmf, orig_pmd);
                                 if (!(ret & VM_FAULT_FALLBACK))
                                         return ret;
@@ -4336,7 +4337,7 @@ int follow_phys(struct vm_area_struct *vma,
                 goto out;
         pte = *ptep;
  
-       if (!pte_access_permitted(pte, flags & FOLL_WRITE))
+       if ((flags & FOLL_WRITE) && !pte_write(pte))
                 goto unlock;
  
         *prot = pgprot_val(pte_pgprot(pte));
diff --git a/mm/mmap.c b/mm/mmap.c

index a4d5468212149db8a4cf20f9917c7bf48231a9ce..9efdc021ad2202fc9ebd7e55fe572813136d2f2c 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3019,20 +3019,20 @@ void exit_mmap(struct mm_struct *mm)
         /* Use -1 here to ensure all VMAs in the mm are unmapped */
         unmap_vmas(&tlb, vma, 0, -1);
  
-       set_bit(MMF_OOM_SKIP, &mm->flags);
-       if (unlikely(tsk_is_oom_victim(current))) {
+       if (unlikely(mm_is_oom_victim(mm))) {
                 /*
                  * Wait for oom_reap_task() to stop working on this
                  * mm. Because MMF_OOM_SKIP is already set before
                  * calling down_read(), oom_reap_task() will not run
                  * on this "mm" post up_write().
                  *
-                * tsk_is_oom_victim() cannot be set from under us
-                * either because current->mm is already set to NULL
+                * mm_is_oom_victim() cannot be set from under us
+                * either because victim->mm is already set to NULL
                  * under task_lock before calling mmput and oom_mm is
-                * set not NULL by the OOM killer only if current->mm
+                * set not NULL by the OOM killer only if victim->mm
                  * is found not NULL while holding the task_lock.
                  */
+               set_bit(MMF_OOM_SKIP, &mm->flags);
                 down_write(&mm->mmap_sem);
                 up_write(&mm->mmap_sem);
         }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index c957be32b27a9e7a17a6e33e69a31b1b6fa8e820..29f855551efef89d6c251075828bc0cd79da1842 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -683,8 +683,10 @@ static void mark_oom_victim(struct task_struct *tsk)
                 return;
  
         /* oom_mm is bound to the signal struct life time. */
-       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
+       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
                 mmgrab(tsk->signal->oom_mm);
+               set_bit(MMF_OOM_VICTIM, &mm->flags);
+       }
  
         /*
          * Make sure that the task is woken up from uninterruptible sleep
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 73f5d4556b3d0b7218bea0cb9bb0fdd1f1cb3cdd..7e5e775e97f400d8a050effc41f68a8261bd9a23 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2684,6 +2684,7 @@ void free_unref_page_list(struct list_head *list)
  {
         struct page *page, *next;
         unsigned long flags, pfn;
+       int batch_count = 0;
  
         /* Prepare pages for freeing */
         list_for_each_entry_safe(page, next, list, lru) {
@@ -2700,6 +2701,16 @@ void free_unref_page_list(struct list_head *list)
                 set_page_private(page, 0);
                 trace_mm_page_free_batched(page);
                 free_unref_page_commit(page, pfn);
+
+               /*
+                * Guard against excessive IRQ disabled times when we get
+                * a large list of pages to free.
+                */
+               if (++batch_count == SWAP_CLUSTER_MAX) {
+                       local_irq_restore(flags);
+                       batch_count = 0;
+                       local_irq_save(flags);
+               }
         }
         local_irq_restore(flags);
  }
diff --git a/mm/percpu.c b/mm/percpu.c

index 79e3549cab0f40f30916499f2acca7622379fdc5..50e7fdf84055151d8c7e8bb220f7a73e96b7f3e4 100644 (file)
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2719,7 +2719,11 @@ void __init setup_per_cpu_areas(void)
  
         if (pcpu_setup_first_chunk(ai, fc) < 0)
                 panic("Failed to initialize percpu areas.");
+#ifdef CONFIG_CRIS
+#warning "the CRIS architecture has physical and virtual addresses confused"
+#else
         pcpu_free_alloc_info(ai);
+#endif
  }
  
  #endif /* CONFIG_SMP */
diff --git a/mm/slab.c b/mm/slab.c

index 183e996dde5ff37a8881e9c223a348de947bf890..4e51ef954026bd15e5bef41167459970976faab0 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1584,11 +1584,8 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
                        *dbg_redzone2(cachep, objp));
         }
  
-       if (cachep->flags & SLAB_STORE_USER) {
-               pr_err("Last user: [<%p>](%pSR)\n",
-                      *dbg_userword(cachep, objp),
-                      *dbg_userword(cachep, objp));
-       }
+       if (cachep->flags & SLAB_STORE_USER)
+               pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));
         realobj = (char *)objp + obj_offset(cachep);
         size = cachep->object_size;
         for (i = 0; i < size && lines; i += 16, lines--) {
@@ -1621,7 +1618,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                         /* Mismatch ! */
                         /* Print header */
                         if (lines == 0) {
-                               pr_err("Slab corruption (%s): %s start=%p, len=%d\n",
+                               pr_err("Slab corruption (%s): %s start=%px, len=%d\n",
                                        print_tainted(), cachep->name,
                                        realobj, size);
                                 print_objinfo(cachep, objp, 0);
@@ -1650,13 +1647,13 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
                 if (objnr) {
                         objp = index_to_obj(cachep, page, objnr - 1);
                         realobj = (char *)objp + obj_offset(cachep);
-                       pr_err("Prev obj: start=%p, len=%d\n", realobj, size);
+                       pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
                         print_objinfo(cachep, objp, 2);
                 }
                 if (objnr + 1 < cachep->num) {
                         objp = index_to_obj(cachep, page, objnr + 1);
                         realobj = (char *)objp + obj_offset(cachep);
-                       pr_err("Next obj: start=%p, len=%d\n", realobj, size);
+                       pr_err("Next obj: start=%px, len=%d\n", realobj, size);
                         print_objinfo(cachep, objp, 2);
                 }
         }
@@ -2608,7 +2605,7 @@ static void slab_put_obj(struct kmem_cache *cachep,
         /* Verify double free bug */
         for (i = page->active; i < cachep->num; i++) {
                 if (get_free_obj(page, i) == objnr) {
-                       pr_err("slab: double free detected in cache '%s', objp %p\n",
+                       pr_err("slab: double free detected in cache '%s', objp %px\n",
                                cachep->name, objp);
                         BUG();
                 }
@@ -2772,7 +2769,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
         else
                 slab_error(cache, "memory outside object was overwritten");
  
-       pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
+       pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
                obj, redzone1, redzone2);
  }
  
@@ -3078,7 +3075,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
                                 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
                         slab_error(cachep, "double free, or memory outside object was overwritten");
-                       pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
+                       pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
                                objp, *dbg_redzone1(cachep, objp),
                                *dbg_redzone2(cachep, objp));
                 }
@@ -3091,7 +3088,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                 cachep->ctor(objp);
         if (ARCH_SLAB_MINALIGN &&
             ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
-               pr_err("0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
+               pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
                        objp, (int)ARCH_SLAB_MINALIGN);
         }
         return objp;
@@ -4283,7 +4280,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
                 return;
         }
  #endif
-       seq_printf(m, "%p", (void *)address);
+       seq_printf(m, "%px", (void *)address);
  }
  
  static int leaks_show(struct seq_file *m, void *p)
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c

index 1b659ab652fb0c70f964d0bb292e99527a4eda8f..bbe8414b6ee7d21f86e5ab9302ebfc875dbe33d3 100644 (file)
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
         orig_node->last_seen = jiffies;
  
         /* find packet count of corresponding one hop neighbor */
-       spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+       spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
         if_num = if_incoming->if_num;
         orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
         neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
@@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
         } else {
                 neigh_rq_count = 0;
         }
-       spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+       spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
  
         /* pay attention to not get a value bigger than 100 % */
         if (orig_eq_count > neigh_rq_count)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c

index 341ceab8338d829d14a48831bef805bb04af0a8f..e0e2bfcd6b3efd73f2567d23ce304951c9e2c3f0 100644 (file)
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
         }
  
         orig_gw = batadv_gw_node_get(bat_priv, orig_node);
-       if (!orig_node)
+       if (!orig_gw)
                 goto out;
  
         if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c

index a98cf1104a30a30e66fb6018bef59dea83dc4b7a..ebe6e38934e46ed5de4d30204e791dbe40285fcc 100644 (file)
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
          */
         if (skb->priority >= 256 && skb->priority <= 263)
                 frag_header.priority = skb->priority - 256;
+       else
+               frag_header.priority = 0;
  
         ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
         ether_addr_copy(frag_header.dest, orig_node->orig);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c

index 15cd2139381e17f0b501dd5294631a1a3315c587..ebc4e2241c770d826fa8e731ba46ec043fe1992d 100644 (file)
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
  
  /**
   * batadv_tp_sender_timeout - timer that fires in case of packet loss
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
   *
   * If fired it means that there was packet loss.
   * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
@@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
  /**
   * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
   *  reached without received ack
- * @arg: address of the related tp_vars
+ * @t: address to timer_list inside tp_vars
   */
  static void batadv_tp_receiver_shutdown(struct timer_list *t)
  {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c

index d0ef0a8e8831920cb86fc767eb0d756bf89feb46..015f465c514b28564c9e91eec40dc041b765fe25 100644 (file)
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
         struct net_bridge *br = netdev_priv(dev);
         int err;
  
+       err = register_netdevice(dev);
+       if (err)
+               return err;
+
         if (tb[IFLA_ADDRESS]) {
                 spin_lock_bh(&br->lock);
                 br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
                 spin_unlock_bh(&br->lock);
         }
  
-       err = register_netdevice(dev);
-       if (err)
-               return err;
-
         err = br_changelink(dev, tb, data, extack);
         if (err)
-               unregister_netdevice(dev);
+               br_dev_delete(dev, NULL);
+
         return err;
  }
  
diff --git a/net/core/dev.c b/net/core/dev.c

index f47e96b623088ae354947787ef17217cd32ae64e..01ee854454a8089cdd49e2c8964a99f6a2d74730 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                                      hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
                                      troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
                         goto do_drop;
-               if (troom > 0 && __skb_linearize(skb))
+               if (skb_linearize(skb))
                         goto do_drop;
         }
  
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c

index b797832565d34ccefb374ee87f9cbc460779a484..60a71be75aea063b418a48ade2a1e1c7804ab35c 100644 (file)
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
         spin_lock_bh(&net->nsid_lock);
         peer = idr_find(&net->netns_ids, id);
         if (peer)
-               get_net(peer);
+               peer = maybe_get_net(peer);
         spin_unlock_bh(&net->nsid_lock);
         rcu_read_unlock();
  
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c

index 1c4810919a0a35900d45a659de0cd780b7e500d3..b9057478d69c8ad02ea7b4ba8d6f612e7792a738 100644 (file)
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -14,7 +14,6 @@
  #include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/types.h>
-#include <linux/module.h>
  #include <linux/string.h>
  #include <linux/errno.h>
  #include <linux/skbuff.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index 6b0ff396fa9dc58fed483597d459c66243be4cd2..a3cb0be4c6f3b5b519b60ac8dde73c33e902763a 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
         u32 d_off;
  
         if (!num_frags)
-               return 0;
+               goto release;
  
         if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
                 return -EINVAL;
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
         __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
         skb_shinfo(skb)->nr_frags = new_frags;
  
+release:
         skb_zcopy_clear(skb, false);
         return 0;
  }
@@ -3654,8 +3655,6 @@ normal:
  
                 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
                                               SKBTX_SHARED_FRAG;
-               if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
-                       goto err;
  
                 while (pos < offset + len) {
                         if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
  
                         if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
                                 goto err;
+                       if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+                               goto err;
  
                         *nskb_frag = *frag;
                         __skb_frag_ref(nskb_frag);
@@ -4293,7 +4294,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
         struct sock *sk = skb->sk;
  
         if (!skb_may_tx_timestamp(sk, false))
-               return;
+               goto err;
  
         /* Take a reference to prevent skb_orphan() from freeing the socket,
          * but only if the socket refcount is not zero.
@@ -4302,7 +4303,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
                 *skb_hwtstamps(skb) = *hwtstamps;
                 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
                 sock_put(sk);
+               return;
         }
+
+err:
+       kfree_skb(skb);
  }
  EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
  
diff --git a/net/dsa/slave.c b/net/dsa/slave.c

index d6e7a642493b03223ab9890247983d9d1499cea0..a95a55f7913746bab3aa7a993265885ece25f35a 100644 (file)
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -16,7 +16,6 @@
  #include <linux/of_net.h>
  #include <linux/of_mdio.h>
  #include <linux/mdio.h>
-#include <linux/list.h>
  #include <net/rtnetlink.h>
  #include <net/pkt_cls.h>
  #include <net/tc_act/tc_mirred.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index a4573bccd6da7b6763016d4f07d3032d44483d99..7a93359fbc7229389fc7bec67889ca1115f47a69 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1428,7 +1428,7 @@ skip:
  
  static bool inetdev_valid_mtu(unsigned int mtu)
  {
-       return mtu >= 68;
+       return mtu >= IPV4_MIN_MTU;
  }
  
  static void inetdev_send_gratuitous_arp(struct net_device *dev,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c

index f52d27a422c37298b2ad0c1dbba0e5307f1a46b6..08259d078b1ca821c581aeb34251c79a9aba8c8d 100644 (file)
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
  
  static void ip_fib_net_exit(struct net *net)
  {
-       unsigned int i;
+       int i;
  
         rtnl_lock();
  #ifdef CONFIG_IP_MULTIPLE_TABLES
         RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
         RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
  #endif
-       for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+       /* Destroy the tables in reverse order to guarantee that the
+        * local table, ID 255, is destroyed before the main table, ID
+        * 254. This is necessary as the local table may contain
+        * references to data contained in the main table.
+        */
+       for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
                 struct hlist_head *head = &net->ipv4.fib_table_hash[i];
                 struct hlist_node *tmp;
                 struct fib_table *tb;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c

index f04d944f8abe0bfbb840837bb35d28fe6d8d25d0..c586597da20dbb0e46eb0f693fd65bccfc8f3633 100644 (file)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
  
         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
                 int type = nla_type(nla);
-               u32 val;
+               u32 fi_val, val;
  
                 if (!type)
                         continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
                         val = nla_get_u32(nla);
                 }
  
-               if (fi->fib_metrics->metrics[type - 1] != val)
+               fi_val = fi->fib_metrics->metrics[type - 1];
+               if (type == RTAX_FEATURES)
+                       fi_val &= ~DST_FEATURE_ECN_CA;
+
+               if (fi_val != val)
                         return false;
         }
  
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c

index d1f8f302dbf3ed5a079f27efa6eeaf802de40243..726f6b6082748896686ae603546fa189348f9142 100644 (file)
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -89,6 +89,7 @@
  #include <linux/rtnetlink.h>
  #include <linux/times.h>
  #include <linux/pkt_sched.h>
+#include <linux/byteorder/generic.h>
  
  #include <net/net_namespace.h>
  #include <net/arp.h>
@@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
         return scount;
  }
  
+/* source address selection per RFC 3376 section 4.2.13 */
+static __be32 igmpv3_get_srcaddr(struct net_device *dev,
+                                const struct flowi4 *fl4)
+{
+       struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+       if (!in_dev)
+               return htonl(INADDR_ANY);
+
+       for_ifa(in_dev) {
+               if (inet_ifa_match(fl4->saddr, ifa))
+                       return fl4->saddr;
+       } endfor_ifa(in_dev);
+
+       return htonl(INADDR_ANY);
+}
+
  static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
  {
         struct sk_buff *skb;
@@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
         pip->frag_off = htons(IP_DF);
         pip->ttl      = 1;
         pip->daddr    = fl4.daddr;
-       pip->saddr    = fl4.saddr;
+       pip->saddr    = igmpv3_get_srcaddr(dev, &fl4);
         pip->protocol = IPPROTO_IGMP;
         pip->tot_len  = 0;      /* filled in later */
         ip_select_ident(net, skb, NULL);
@@ -404,16 +422,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
  }
  
  static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
-       int type, struct igmpv3_grec **ppgr)
+       int type, struct igmpv3_grec **ppgr, unsigned int mtu)
  {
         struct net_device *dev = pmc->interface->dev;
         struct igmpv3_report *pih;
         struct igmpv3_grec *pgr;
  
-       if (!skb)
-               skb = igmpv3_newpack(dev, dev->mtu);
-       if (!skb)
-               return NULL;
+       if (!skb) {
+               skb = igmpv3_newpack(dev, mtu);
+               if (!skb)
+                       return NULL;
+       }
         pgr = skb_put(skb, sizeof(struct igmpv3_grec));
         pgr->grec_type = type;
         pgr->grec_auxwords = 0;
@@ -436,12 +455,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
         struct igmpv3_grec *pgr = NULL;
         struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
         int scount, stotal, first, isquery, truncate;
+       unsigned int mtu;
  
         if (pmc->multiaddr == IGMP_ALL_HOSTS)
                 return skb;
         if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
                 return skb;
  
+       mtu = READ_ONCE(dev->mtu);
+       if (mtu < IPV4_MIN_MTU)
+               return skb;
+
         isquery = type == IGMPV3_MODE_IS_INCLUDE ||
                   type == IGMPV3_MODE_IS_EXCLUDE;
         truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
@@ -462,7 +486,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
                     AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
                         if (skb)
                                 igmpv3_sendpack(skb);
-                       skb = igmpv3_newpack(dev, dev->mtu);
+                       skb = igmpv3_newpack(dev, mtu);
                 }
         }
         first = 1;
@@ -498,12 +522,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
                                 pgr->grec_nsrcs = htons(scount);
                         if (skb)
                                 igmpv3_sendpack(skb);
-                       skb = igmpv3_newpack(dev, dev->mtu);
+                       skb = igmpv3_newpack(dev, mtu);
                         first = 1;
                         scount = 0;
                 }
                 if (first) {
-                       skb = add_grhead(skb, pmc, type, &pgr);
+                       skb = add_grhead(skb, pmc, type, &pgr, mtu);
                         first = 0;
                 }
                 if (!skb)
@@ -538,7 +562,7 @@ empty_source:
                                 igmpv3_sendpack(skb);
                                 skb = NULL; /* add_grhead will get a new one */
                         }
-                       skb = add_grhead(skb, pmc, type, &pgr);
+                       skb = add_grhead(skb, pmc, type, &pgr, mtu);
                 }
         }
         if (pgr)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index bb6239169b1ab943a6418494dab5018843bcde3c..45ffd3d045d240cad8e4d0ed8dd0dd7da997bf9e 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
         len = gre_hdr_len + sizeof(*ershdr);
  
         if (unlikely(!pskb_may_pull(skb, len)))
-               return -ENOMEM;
+               return PACKET_REJECT;
  
         iph = ip_hdr(skb);
         ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
  static void ipgre_tap_setup(struct net_device *dev)
  {
         ether_setup(dev);
+       dev->max_mtu = 0;
         dev->netdev_ops = &gre_tap_netdev_ops;
         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c

index fe6fee728ce49d01b55aa478698e1a3bcf9a3bdb..5ddb1cb52bd405ed10cce43195a25607d136efbf 100644 (file)
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
         dev->needed_headroom = t_hlen + hlen;
         mtu -= (dev->hard_header_len + t_hlen);
  
-       if (mtu < 68)
-               mtu = 68;
+       if (mtu < IPV4_MIN_MTU)
+               mtu = IPV4_MIN_MTU;
  
         return mtu;
  }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c

index f88221aebc9d7b61cf2c09f2b3d2351c4095f64f..0c3c944a7b7201f74dace535c8a26d58b8039a1f 100644 (file)
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -373,7 +373,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
                                         if (!xt_find_jump_offset(offsets, newpos,
                                                                  newinfo->number))
                                                 return 0;
-                                       e = entry0 + newpos;
                                 } else {
                                         /* ... this is a fallthru */
                                         newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c

index 4cbe5e80f3bf079755cd08f33c24a4077c6c4a63..2e0d339028bbcb6766f92e5b87d70866a419b893 100644 (file)
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -439,7 +439,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                         if (!xt_find_jump_offset(offsets, newpos,
                                                                  newinfo->number))
                                                 return 0;
-                                       e = entry0 + newpos;
                                 } else {
                                         /* ... this is a fallthru */
                                         newpos = pos + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c

index 17b4ca562944c35b50015bbc40aa580ca62ddfb7..69060e3abe8598b350e6bfe5815a702c4b3d2ade 100644 (file)
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -813,12 +813,13 @@ static int clusterip_net_init(struct net *net)
  
  static void clusterip_net_exit(struct net *net)
  {
-#ifdef CONFIG_PROC_FS
         struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+#ifdef CONFIG_PROC_FS
         proc_remove(cn->procdir);
         cn->procdir = NULL;
  #endif
         nf_unregister_net_hook(net, &cip_arp_ops);
+       WARN_ON_ONCE(!list_empty(&cn->configs));
  }
  
  static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index 33b70bfd1122f08f4897ea6a68eb51e3a74bb1e5..125c1eab3eaa6d894804c3aa8918aa7fcc736ca0 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -513,11 +513,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
         int err;
         struct ip_options_data opt_copy;
         struct raw_frag_vec rfv;
+       int hdrincl;
  
         err = -EMSGSIZE;
         if (len > 0xFFFF)
                 goto out;
  
+       /* hdrincl should be READ_ONCE(inet->hdrincl)
+        * but READ_ONCE() doesn't work with bit fields
+        */
+       hdrincl = inet->hdrincl;
         /*
          *      Check the flags.
          */
@@ -593,7 +598,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                 /* Linux does not mangle headers on raw sockets,
                  * so that IP options + IP_HDRINCL is non-sense.
                  */
-               if (inet->hdrincl)
+               if (hdrincl)
                         goto done;
                 if (ipc.opt->opt.srr) {
                         if (!daddr)
@@ -615,12 +620,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
  
         flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
                            RT_SCOPE_UNIVERSE,
-                          inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+                          hdrincl ? IPPROTO_RAW : sk->sk_protocol,
                            inet_sk_flowi_flags(sk) |
-                           (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+                           (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
                            daddr, saddr, 0, 0, sk->sk_uid);
  
-       if (!inet->hdrincl) {
+       if (!hdrincl) {
                 rfv.msg = msg;
                 rfv.hlen = 0;
  
@@ -645,7 +650,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                 goto do_confirm;
  back_from_confirm:
  
-       if (inet->hdrincl)
+       if (hdrincl)
                 err = raw_send_hdrinc(sk, &fl4, msg, len,
                                       &rt, msg->msg_flags, &ipc.sockc);
  
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 9550cc42de2d9ba4cca6d961a2a3bca501755a69..45f750e85714da11f569ae0c6522f1cc56c6d2a2 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
         u32 new_sample = tp->rcv_rtt_est.rtt_us;
         long m = sample;
  
-       if (m == 0)
-               m = 1;
-
         if (new_sample != 0) {
                 /* If we sample in larger samples in the non-timestamp
                  * case, we could grossly overestimate the RTT especially
@@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
         if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
                 return;
         delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
+       if (!delta_us)
+               delta_us = 1;
         tcp_rcv_rtt_update(tp, delta_us, 1);
  
  new_measure:
@@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
             (TCP_SKB_CB(skb)->end_seq -
              TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
                 u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-               u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+               u32 delta_us;
  
+               if (!delta)
+                       delta = 1;
+               delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
                 tcp_rcv_rtt_update(tp, delta_us, 0);
         }
  }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 77ea45da0fe9c746907a312989658af3ad3b198d..94e28350f4205b1a57809d5471d7e2ade51f5196 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
                         req->ts_recent,
                         0,
-                       tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
+                       tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
                                           AF_INET),
                         inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
                         ip_hdr(skb)->tos);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c

index 16df6dd44b988a128d97df3a7953437499a216e8..968fda1983762e6d7c078a28ccfcbd9066788daf 100644 (file)
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -264,6 +264,7 @@ void tcp_delack_timer_handler(struct sock *sk)
                         icsk->icsk_ack.pingpong = 0;
                         icsk->icsk_ack.ato      = TCP_ATO_MIN;
                 }
+               tcp_mstamp_refresh(tcp_sk(sk));
                 tcp_send_ack(sk);
                 __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
         }
@@ -632,6 +633,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
                 goto out;
         }
  
+       tcp_mstamp_refresh(tp);
         if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
                 if (tp->linger2 >= 0) {
                         const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c

index c26f71234b9c01a82ec9d40423ee28957468ed46..c9441ca4539936486291147a47a84ef1b2ecf095 100644 (file)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,6 @@ lookup_protocol:
         np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
         np->mc_loop     = 1;
         np->pmtudisc    = IPV6_PMTUDISC_WANT;
-       np->autoflowlabel = ip6_default_np_autolabel(net);
         np->repflow     = net->ipv6.sysctl.flowlabel_reflect;
         sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
  
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c

index 4cfd8e0696fe77f6d7af7ca3579a2418aef972f6..416c8913f132c0c662ed811f74b6127083ad22a8 100644 (file)
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1308,6 +1308,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
  
         ether_setup(dev);
  
+       dev->max_mtu = 0;
         dev->netdev_ops = &ip6gre_tap_netdev_ops;
         dev->needs_free_netdev = true;
         dev->priv_destructor = ip6gre_dev_free;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index 5110a418cc4d0c1040506394460cb482698d8c15..f7dd51c4231415fd1321fd431194d896ea2d1689 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
  }
  
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+       if (!np->autoflowlabel_set)
+               return ip6_default_np_autolabel(net);
+       else
+               return np->autoflowlabel;
+}
+
  /*
   * xmit an sk_buff (used by TCP, SCTP and DCCP)
   * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                 hlimit = ip6_dst_hoplimit(dst);
  
         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                                    np->autoflowlabel, fl6));
+                               ip6_autoflowlabel(net, np), fl6));
  
         hdr->payload_len = htons(seg_len);
         hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
  
         ip6_flow_hdr(hdr, v6_cork->tclass,
                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                       np->autoflowlabel, fl6));
+                                       ip6_autoflowlabel(net, np), fl6));
         hdr->hop_limit = v6_cork->hop_limit;
         hdr->nexthdr = proto;
         hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c

index db84f523656ddf876e1971c416ee03a6a1794d9d..931c38f6ff4a42fb17cf129cf6035706a24176dc 100644 (file)
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1123,8 +1123,13 @@ route_lookup:
                 max_headroom += 8;
                 mtu -= 8;
         }
-       if (mtu < IPV6_MIN_MTU)
-               mtu = IPV6_MIN_MTU;
+       if (skb->protocol == htons(ETH_P_IPV6)) {
+               if (mtu < IPV6_MIN_MTU)
+                       mtu = IPV6_MIN_MTU;
+       } else if (mtu < 576) {
+               mtu = 576;
+       }
+
         if (skb_dst(skb) && !t->parms.collect_md)
                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
         if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c

index b9404feabd7857fe0873fbc4f346d281f3600807..2d4680e0376f41deee6c999eadaf9409353e0b4a 100644 (file)
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -886,6 +886,7 @@ pref_skip_coa:
                 break;
         case IPV6_AUTOFLOWLABEL:
                 np->autoflowlabel = valbool;
+               np->autoflowlabel_set = 1;
                 retv = 0;
                 break;
         case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c

index fc6d7d143f2c29aab9a3f56eae02e5337e65a97b..844642682b8363c4c32d329ed92474f834a59618 100644 (file)
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
  }
  
  static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
-       int type, struct mld2_grec **ppgr)
+       int type, struct mld2_grec **ppgr, unsigned int mtu)
  {
-       struct net_device *dev = pmc->idev->dev;
         struct mld2_report *pmr;
         struct mld2_grec *pgr;
  
-       if (!skb)
-               skb = mld_newpack(pmc->idev, dev->mtu);
-       if (!skb)
-               return NULL;
+       if (!skb) {
+               skb = mld_newpack(pmc->idev, mtu);
+               if (!skb)
+                       return NULL;
+       }
         pgr = skb_put(skb, sizeof(struct mld2_grec));
         pgr->grec_type = type;
         pgr->grec_auxwords = 0;
@@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
         struct mld2_grec *pgr = NULL;
         struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
         int scount, stotal, first, isquery, truncate;
+       unsigned int mtu;
  
         if (pmc->mca_flags & MAF_NOREPORT)
                 return skb;
  
+       mtu = READ_ONCE(dev->mtu);
+       if (mtu < IPV6_MIN_MTU)
+               return skb;
+
         isquery = type == MLD2_MODE_IS_INCLUDE ||
                   type == MLD2_MODE_IS_EXCLUDE;
         truncate = type == MLD2_MODE_IS_EXCLUDE ||
@@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
                     AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
                         if (skb)
                                 mld_sendpack(skb);
-                       skb = mld_newpack(idev, dev->mtu);
+                       skb = mld_newpack(idev, mtu);
                 }
         }
         first = 1;
@@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
                                 pgr->grec_nsrcs = htons(scount);
                         if (skb)
                                 mld_sendpack(skb);
-                       skb = mld_newpack(idev, dev->mtu);
+                       skb = mld_newpack(idev, mtu);
                         first = 1;
                         scount = 0;
                 }
                 if (first) {
-                       skb = add_grhead(skb, pmc, type, &pgr);
+                       skb = add_grhead(skb, pmc, type, &pgr, mtu);
                         first = 0;
                 }
                 if (!skb)
@@ -1814,7 +1819,7 @@ empty_source:
                                 mld_sendpack(skb);
                                 skb = NULL; /* add_grhead will get a new one */
                         }
-                       skb = add_grhead(skb, pmc, type, &pgr);
+                       skb = add_grhead(skb, pmc, type, &pgr, mtu);
                 }
         }
         if (pgr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c

index f06e25065a342e361d7ae68ae1d60304b3f43f39..1d7ae93663351297395208f2c9a65bd5fba236e5 100644 (file)
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -458,7 +458,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
                                         if (!xt_find_jump_offset(offsets, newpos,
                                                                  newinfo->number))
                                                 return 0;
-                                       e = entry0 + newpos;
                                 } else {
                                         /* ... this is a fallthru */
                                         newpos = pos + e->next_offset;
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c

index 2b1a15846f9ac1f40d45aef52af6aab92d515408..92c0047e7e33dc5925054c41143fe200db06f125 100644 (file)
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
  
         if (range->flags & NF_NAT_RANGE_MAP_IPS)
                 return -EINVAL;
-       return 0;
+       return nf_ct_netns_get(par->net, par->family);
+}
+
+static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+       nf_ct_netns_put(par->net, par->family);
  }
  
  static struct xt_target masquerade_tg6_reg __read_mostly = {
         .name           = "MASQUERADE",
         .family         = NFPROTO_IPV6,
         .checkentry     = masquerade_tg6_checkentry,
+       .destroy        = masquerade_tg6_destroy,
         .target         = masquerade_tg6,
         .targetsize     = sizeof(struct nf_nat_range),
         .table          = "nat",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 7a8d1500d374b4089e623ed2b20d68110cff498e..0458b761f3c56ce765841e0a3a7e5e78f90b95eb 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
         }
  
         rt->dst.flags |= DST_HOST;
+       rt->dst.input = ip6_input;
         rt->dst.output  = ip6_output;
         rt->rt6i_gateway  = fl6->daddr;
         rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                 if (!ipv6_addr_any(&fl6.saddr))
                         flags |= RT6_LOOKUP_F_HAS_SADDR;
  
-               if (!fibmatch)
-                       dst = ip6_route_input_lookup(net, dev, &fl6, flags);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_input_lookup(net, dev, &fl6, flags);
  
                 rcu_read_unlock();
         } else {
                 fl6.flowi6_oif = oif;
  
-               if (!fibmatch)
-                       dst = ip6_route_output(net, NULL, &fl6);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_output(net, NULL, &fl6);
         }
  
  
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                 goto errout;
         }
  
+       if (fibmatch && rt->dst.from) {
+               struct rt6_info *ort = container_of(rt->dst.from,
+                                                   struct rt6_info, dst);
+
+               dst_hold(&ort->dst);
+               ip6_rt_put(rt);
+               rt = ort;
+       }
+
         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
         if (!skb) {
                 ip6_rt_put(rt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index 1f04ec0e4a7aa2c11b8ee27cbdd4067b5bcf32e5..7178476b3d2f64f01832fe3292c7dec849ec2265 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
                         req->ts_recent, sk->sk_bound_dev_if,
-                       tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
+                       tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
                         0, 0);
  }
  
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c

index 167f83b853e6bd391256e15ef99439b792e18cdc..1621b6ab17ba45e63f79e85a42563781b5536dc2 100644 (file)
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -291,16 +291,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
         int i;
  
         mutex_lock(&sta->ampdu_mlme.mtx);
-       for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
+       for (i = 0; i <  IEEE80211_NUM_TIDS; i++)
                 ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
                                                 WLAN_REASON_QSTA_LEAVE_QBSS,
                                                 reason != AGG_STOP_DESTROY_STA &&
                                                 reason != AGG_STOP_PEER_REQUEST);
-       }
-       mutex_unlock(&sta->ampdu_mlme.mtx);
  
         for (i = 0; i <  IEEE80211_NUM_TIDS; i++)
                 ___ieee80211_stop_tx_ba_session(sta, i, reason);
+       mutex_unlock(&sta->ampdu_mlme.mtx);
  
         /* stopping might queue the work again - so cancel only afterwards */
         cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c

index cf1bf2605c1027207a86889f93da667d8b2313b9..dc6347342e34c499eaef5403f63034b137ad14e3 100644 (file)
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -103,7 +103,6 @@ struct bitstr {
  #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
  #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
  #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
-#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
  static unsigned int get_len(struct bitstr *bs);
  static unsigned int get_bit(struct bitstr *bs);
  static unsigned int get_bits(struct bitstr *bs, unsigned int b);
@@ -165,6 +164,19 @@ static unsigned int get_len(struct bitstr *bs)
         return v;
  }
  
+static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
+{
+       bits += bs->bit;
+       bytes += bits / BITS_PER_BYTE;
+       if (bits % BITS_PER_BYTE > 0)
+               bytes++;
+
+       if (*bs->cur + bytes > *bs->end)
+               return 1;
+
+       return 0;
+}
+
  /****************************************************************************/
  static unsigned int get_bit(struct bitstr *bs)
  {
@@ -279,8 +291,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f,
         PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
  
         INC_BIT(bs);
-
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -293,11 +305,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f,
         PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
  
         BYTE_ALIGN(bs);
-       CHECK_BOUND(bs, 1);
+       if (nf_h323_error_boundary(bs, 1, 0))
+               return H323_ERROR_BOUND;
+
         len = *bs->cur++;
         bs->cur += len;
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
  
-       CHECK_BOUND(bs, 0);
         return H323_ERROR_NONE;
  }
  
@@ -319,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
                 bs->cur += 2;
                 break;
         case CONS:              /* 64K < Range < 4G */
+               if (nf_h323_error_boundary(bs, 0, 2))
+                       return H323_ERROR_BOUND;
                 len = get_bits(bs, 2) + 1;
                 BYTE_ALIGN(bs);
                 if (base && (f->attr & DECODE)) {       /* timeToLive */
@@ -330,7 +347,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
                 break;
         case UNCO:
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 len = get_len(bs);
                 bs->cur += len;
                 break;
@@ -341,7 +359,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f,
  
         PRINT("\n");
  
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -357,7 +376,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f,
                 INC_BITS(bs, f->sz);
         }
  
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -375,12 +395,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
                 len = f->lb;
                 break;
         case WORD:              /* 2-byte length */
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 len = (*bs->cur++) << 8;
                 len += (*bs->cur++) + f->lb;
                 break;
         case SEMI:
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 len = get_len(bs);
                 break;
         default:
@@ -391,7 +413,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
         bs->cur += len >> 3;
         bs->bit = len & 7;
  
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -404,12 +427,15 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f,
         PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
  
         /* 2 <= Range <= 255 */
+       if (nf_h323_error_boundary(bs, 0, f->sz))
+               return H323_ERROR_BOUND;
         len = get_bits(bs, f->sz) + f->lb;
  
         BYTE_ALIGN(bs);
         INC_BITS(bs, (len << 2));
  
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -440,15 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
                 break;
         case BYTE:              /* Range == 256 */
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 1);
+               if (nf_h323_error_boundary(bs, 1, 0))
+                       return H323_ERROR_BOUND;
                 len = (*bs->cur++) + f->lb;
                 break;
         case SEMI:
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 len = get_len(bs) + f->lb;
                 break;
         default:                /* 2 <= Range <= 255 */
+               if (nf_h323_error_boundary(bs, 0, f->sz))
+                       return H323_ERROR_BOUND;
                 len = get_bits(bs, f->sz) + f->lb;
                 BYTE_ALIGN(bs);
                 break;
@@ -458,7 +488,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f,
  
         PRINT("\n");
  
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -473,10 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
         switch (f->sz) {
         case BYTE:              /* Range == 256 */
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 1);
+               if (nf_h323_error_boundary(bs, 1, 0))
+                       return H323_ERROR_BOUND;
                 len = (*bs->cur++) + f->lb;
                 break;
         default:                /* 2 <= Range <= 255 */
+               if (nf_h323_error_boundary(bs, 0, f->sz))
+                       return H323_ERROR_BOUND;
                 len = get_bits(bs, f->sz) + f->lb;
                 BYTE_ALIGN(bs);
                 break;
@@ -484,7 +518,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
  
         bs->cur += len << 1;
  
-       CHECK_BOUND(bs, 0);
+       if (nf_h323_error_boundary(bs, 0, 0))
+               return H323_ERROR_BOUND;
         return H323_ERROR_NONE;
  }
  
@@ -503,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
         base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
  
         /* Extensible? */
+       if (nf_h323_error_boundary(bs, 0, 1))
+               return H323_ERROR_BOUND;
         ext = (f->attr & EXT) ? get_bit(bs) : 0;
  
         /* Get fields bitmap */
+       if (nf_h323_error_boundary(bs, 0, f->sz))
+               return H323_ERROR_BOUND;
         bmp = get_bitmap(bs, f->sz);
         if (base)
                 *(unsigned int *)base = bmp;
@@ -525,9 +564,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
  
                 /* Decode */
                 if (son->attr & OPEN) { /* Open field */
-                       CHECK_BOUND(bs, 2);
+                       if (nf_h323_error_boundary(bs, 2, 0))
+                               return H323_ERROR_BOUND;
                         len = get_len(bs);
-                       CHECK_BOUND(bs, len);
+                       if (nf_h323_error_boundary(bs, len, 0))
+                               return H323_ERROR_BOUND;
                         if (!base || !(son->attr & DECODE)) {
                                 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
                                       " ", son->name);
@@ -555,8 +596,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
                 return H323_ERROR_NONE;
  
         /* Get the extension bitmap */
+       if (nf_h323_error_boundary(bs, 0, 7))
+               return H323_ERROR_BOUND;
         bmp2_len = get_bits(bs, 7) + 1;
-       CHECK_BOUND(bs, (bmp2_len + 7) >> 3);
+       if (nf_h323_error_boundary(bs, 0, bmp2_len))
+               return H323_ERROR_BOUND;
         bmp2 = get_bitmap(bs, bmp2_len);
         bmp |= bmp2 >> f->sz;
         if (base)
@@ -567,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
         for (opt = 0; opt < bmp2_len; opt++, i++, son++) {
                 /* Check Range */
                 if (i >= f->ub) {       /* Newer Version? */
-                       CHECK_BOUND(bs, 2);
+                       if (nf_h323_error_boundary(bs, 2, 0))
+                               return H323_ERROR_BOUND;
                         len = get_len(bs);
-                       CHECK_BOUND(bs, len);
+                       if (nf_h323_error_boundary(bs, len, 0))
+                               return H323_ERROR_BOUND;
                         bs->cur += len;
                         continue;
                 }
@@ -583,9 +629,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
                 if (!((0x80000000 >> opt) & bmp2))      /* Not present */
                         continue;
  
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 len = get_len(bs);
-               CHECK_BOUND(bs, len);
+               if (nf_h323_error_boundary(bs, len, 0))
+                       return H323_ERROR_BOUND;
                 if (!base || !(son->attr & DECODE)) {
                         PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
                               son->name);
@@ -623,22 +671,27 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
         switch (f->sz) {
         case BYTE:
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 1);
+               if (nf_h323_error_boundary(bs, 1, 0))
+                       return H323_ERROR_BOUND;
                 count = *bs->cur++;
                 break;
         case WORD:
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 count = *bs->cur++;
                 count <<= 8;
                 count += *bs->cur++;
                 break;
         case SEMI:
                 BYTE_ALIGN(bs);
-               CHECK_BOUND(bs, 2);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 count = get_len(bs);
                 break;
         default:
+               if (nf_h323_error_boundary(bs, 0, f->sz))
+                       return H323_ERROR_BOUND;
                 count = get_bits(bs, f->sz);
                 break;
         }
@@ -658,8 +711,11 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f,
         for (i = 0; i < count; i++) {
                 if (son->attr & OPEN) {
                         BYTE_ALIGN(bs);
+                       if (nf_h323_error_boundary(bs, 2, 0))
+                               return H323_ERROR_BOUND;
                         len = get_len(bs);
-                       CHECK_BOUND(bs, len);
+                       if (nf_h323_error_boundary(bs, len, 0))
+                               return H323_ERROR_BOUND;
                         if (!base || !(son->attr & DECODE)) {
                                 PRINT("%*.s%s\n", (level + 1) * TAB_SIZE,
                                       " ", son->name);
@@ -710,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
         base = (base && (f->attr & DECODE)) ? base + f->offset : NULL;
  
         /* Decode the choice index number */
+       if (nf_h323_error_boundary(bs, 0, 1))
+               return H323_ERROR_BOUND;
         if ((f->attr & EXT) && get_bit(bs)) {
                 ext = 1;
+               if (nf_h323_error_boundary(bs, 0, 7))
+                       return H323_ERROR_BOUND;
                 type = get_bits(bs, 7) + f->lb;
         } else {
                 ext = 0;
+               if (nf_h323_error_boundary(bs, 0, f->sz))
+                       return H323_ERROR_BOUND;
                 type = get_bits(bs, f->sz);
                 if (type >= f->lb)
                         return H323_ERROR_RANGE;
@@ -727,8 +789,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
         /* Check Range */
         if (type >= f->ub) {    /* Newer version? */
                 BYTE_ALIGN(bs);
+               if (nf_h323_error_boundary(bs, 2, 0))
+                       return H323_ERROR_BOUND;
                 len = get_len(bs);
-               CHECK_BOUND(bs, len);
+               if (nf_h323_error_boundary(bs, len, 0))
+                       return H323_ERROR_BOUND;
                 bs->cur += len;
                 return H323_ERROR_NONE;
         }
@@ -742,8 +807,11 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f,
  
         if (ext || (son->attr & OPEN)) {
                 BYTE_ALIGN(bs);
+               if (nf_h323_error_boundary(bs, len, 0))
+                       return H323_ERROR_BOUND;
                 len = get_len(bs);
-               CHECK_BOUND(bs, len);
+               if (nf_h323_error_boundary(bs, len, 0))
+                       return H323_ERROR_BOUND;
                 if (!base || !(son->attr & DECODE)) {
                         PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ",
                               son->name);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 59c08997bfdfdb9c16aa7e9cc1d33f62a46a1769..382d49792f428099a1fa78ebc1f50224ba8b7d97 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,6 @@
  #include <net/netfilter/nf_conntrack_zones.h>
  #include <net/netfilter/nf_conntrack_timestamp.h>
  #include <net/netfilter/nf_conntrack_labels.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
  #include <net/netfilter/nf_conntrack_synproxy.h>
  #ifdef CONFIG_NF_NAT_NEEDED
  #include <net/netfilter/nf_nat_core.h>
@@ -1566,9 +1565,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
  static int ctnetlink_change_timeout(struct nf_conn *ct,
                                     const struct nlattr * const cda[])
  {
-       u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
+       u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
  
-       ct->timeout = nfct_time_stamp + timeout * HZ;
+       if (timeout > INT_MAX)
+               timeout = INT_MAX;
+       ct->timeout = nfct_time_stamp + (u32)timeout;
  
         if (test_bit(IPS_DYING_BIT, &ct->status))
                 return -ETIME;
@@ -1768,6 +1769,7 @@ ctnetlink_create_conntrack(struct net *net,
         int err = -EINVAL;
         struct nf_conntrack_helper *helper;
         struct nf_conn_tstamp *tstamp;
+       u64 timeout;
  
         ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
         if (IS_ERR(ct))
@@ -1776,7 +1778,10 @@ ctnetlink_create_conntrack(struct net *net,
         if (!cda[CTA_TIMEOUT])
                 goto err1;
  
-       ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+       timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
+       if (timeout > INT_MAX)
+               timeout = INT_MAX;
+       ct->timeout = (u32)timeout + nfct_time_stamp;
  
         rcu_read_lock();
         if (cda[CTA_HELP]) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c

index b12fc07111d0847b014410291df947bddc32d46a..37ef35b861f24365c843a4eec5ecc5ad8292cd22 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct,
                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
                 timeout = timeouts[TCP_CONNTRACK_UNACK];
+       else if (ct->proto.tcp.last_win == 0 &&
+                timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
+               timeout = timeouts[TCP_CONNTRACK_RETRANS];
         else
                 timeout = timeouts[new_state];
         spin_unlock_bh(&ct->lock);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index d8327b43e4dce64593573178a397b160488a6355..10798b35748180746266aa0a84187dfd7f0f1ceb 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5847,6 +5847,12 @@ static int __net_init nf_tables_init_net(struct net *net)
         return 0;
  }
  
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+       WARN_ON_ONCE(!list_empty(&net->nft.af_info));
+       WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
  int __nft_release_basechain(struct nft_ctx *ctx)
  {
         struct nft_rule *rule, *nr;
@@ -5917,6 +5923,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
  
  static struct pernet_operations nf_tables_net_ops = {
         .init   = nf_tables_init_net,
+       .exit   = nf_tables_exit_net,
  };
  
  static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c

index 41628b3936731b77885717c27cf4dfa8e62b3c0f..d33ce6d5ebce92db2fab30cb4286c11ffd8c321a 100644 (file)
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -17,6 +17,7 @@
  #include <linux/types.h>
  #include <linux/list.h>
  #include <linux/errno.h>
+#include <linux/capability.h>
  #include <net/netlink.h>
  #include <net/sock.h>
  
@@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
         struct nfnl_cthelper *nlcth;
         int ret = 0;
  
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
         if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
                 return -EINVAL;
  
@@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
         struct nfnl_cthelper *nlcth;
         bool tuple_set = false;
  
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
                 struct netlink_dump_control c = {
                         .dump = nfnl_cthelper_dump_table,
@@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
         struct nfnl_cthelper *nlcth, *n;
         int j = 0, ret;
  
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
         if (tb[NFCTH_NAME])
                 helper_name = nla_data(tb[NFCTH_NAME]);
  
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c

index e5afab86381ca1f1487195009ffde4b8eaeec3e2..e955bec0acc6a949a32ac8d1e6ea383b31983c73 100644 (file)
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net)
  
  static void __net_exit nfnl_log_net_exit(struct net *net)
  {
+       struct nfnl_log_net *log = nfnl_log_pernet(net);
+       unsigned int i;
+
  #ifdef CONFIG_PROC_FS
         remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
  #endif
         nf_log_unset(net, &nfulnl_logger);
+       for (i = 0; i < INSTANCE_BUCKETS; i++)
+               WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
  }
  
  static struct pernet_operations nfnl_log_net_ops = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index a16356cacec3646a9b70a0d0b443db28a696a0a0..c09b36755ed721f45be12523c4c328c97fd0e166 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net)
  
  static void __net_exit nfnl_queue_net_exit(struct net *net)
  {
+       struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+       unsigned int i;
+
         nf_unregister_queue_handler(net);
  #ifdef CONFIG_PROC_FS
         remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
  #endif
+       for (i = 0; i < INSTANCE_BUCKETS; i++)
+               WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
  }
  
  static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c

index a0a93d987a3bd440dc19bafe9e65b023c6baf217..47ec1046ad11536e337f709d1f41a267a77cf1d3 100644 (file)
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
         [NFTA_EXTHDR_OFFSET]            = { .type = NLA_U32 },
         [NFTA_EXTHDR_LEN]               = { .type = NLA_U32 },
         [NFTA_EXTHDR_FLAGS]             = { .type = NLA_U32 },
+       [NFTA_EXTHDR_OP]                = { .type = NLA_U32 },
+       [NFTA_EXTHDR_SREG]              = { .type = NLA_U32 },
  };
  
  static int nft_exthdr_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c

index a77dd514297c9627d6103dbcb6428bb6bdd165ad..55802e97f906d1987ed78b4296584deb38e5f876 100644 (file)
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net)
         return 0;
  }
  
+static void __net_exit xt_net_exit(struct net *net)
+{
+       int i;
+
+       for (i = 0; i < NFPROTO_NUMPROTO; i++)
+               WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+}
+
  static struct pernet_operations xt_net_ops = {
         .init = xt_net_init,
+       .exit = xt_net_exit,
  };
  
  static int __init xt_init(void)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c

index 041da0d9c06f2b1c2ecb31851932ac5a350122a9..1f7fbd3c7e5a6de0cbe7e0bb5c847a98df371aff 100644 (file)
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len,
  {
         struct sock_fprog_kern program;
  
+       if (len > XT_BPF_MAX_NUM_INSTR)
+               return -EINVAL;
+
         program.len = len;
         program.filter = insns;
  
@@ -55,6 +58,9 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
         mm_segment_t oldfs = get_fs();
         int retval, fd;
  
+       if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
+               return -EINVAL;
+
         set_fs(KERNEL_DS);
         fd = bpf_obj_get_user(path, 0);
         set_fs(oldfs);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c

index 36e14b1f061ddf7eb77327a3e717e15b6af83bfb..a34f314a8c2380e6b6a223dd6d38dcc88ca2c1ac 100644 (file)
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -19,6 +19,7 @@
  #include <linux/module.h>
  #include <linux/kernel.h>
  
+#include <linux/capability.h>
  #include <linux/if.h>
  #include <linux/inetdevice.h>
  #include <linux/ip.h>
@@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
         struct xt_osf_finger *kf = NULL, *sf;
         int err = 0;
  
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
         if (!osf_attrs[OSF_ATTR_FINGER])
                 return -EINVAL;
  
@@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
         struct xt_osf_finger *sf;
         int err = -ENOENT;
  
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
         if (!osf_attrs[OSF_ATTR_FINGER])
                 return -EINVAL;
  
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index b9e0ee4e22f57066d0ac0bc5f64181fbb65e6acc..79cc1bf36e4af7d2c70575e56203a482ba2dca97 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
         struct sock *sk = skb->sk;
         int ret = -ENOMEM;
  
+       if (!net_eq(dev_net(dev), sock_net(sk)))
+               return 0;
+
         dev_hold(dev);
  
         if (is_vmalloc_addr(skb->head))
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c

index dbe2379329c5517fb164b6024d40fabebe7855c8..f039064ce922f3aac8419dcda65ad875f89e966b 100644 (file)
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                         return -EINVAL;
  
                 skb_reset_network_header(skb);
+               key->eth.type = skb->protocol;
         } else {
                 eth = eth_hdr(skb);
                 ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                 if (unlikely(parse_vlan(skb, key)))
                         return -ENOMEM;
  
-               skb->protocol = parse_ethertype(skb);
-               if (unlikely(skb->protocol == htons(0)))
+               key->eth.type = parse_ethertype(skb);
+               if (unlikely(key->eth.type == htons(0)))
                         return -ENOMEM;
  
+               /* Multiple tagged packets need to retain TPID to satisfy
+                * skb_vlan_pop(), which will later shift the ethertype into
+                * skb->protocol.
+                */
+               if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+                       skb->protocol = key->eth.cvlan.tpid;
+               else
+                       skb->protocol = key->eth.type;
+
                 skb_reset_network_header(skb);
                 __skb_push(skb, skb->data - skb_mac_header(skb));
         }
         skb_reset_mac_len(skb);
-       key->eth.type = skb->protocol;
  
         /* Network layer. */
         if (key->eth.type == htons(ETH_P_IP)) {
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c

index 1e3f10e5da996a868b8315c53cfac964842bbec3..6445184b2759a783f05a48578af330e0872f5d11 100644 (file)
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -22,7 +22,6 @@
  #include <net/pkt_sched.h>
  #include <uapi/linux/tc_act/tc_ife.h>
  #include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
  
  static int skbmark_encode(struct sk_buff *skb, void *skbdata,
                           struct tcf_meta_info *e)
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c

index 2ea1f26c9e966b26076f48757c5351287d9f0943..7221437ca3a6fadad5ebc3f286cc1fc5d35d89e1 100644 (file)
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -22,7 +22,6 @@
  #include <net/pkt_sched.h>
  #include <uapi/linux/tc_act/tc_ife.h>
  #include <net/tc_act/tc_ife.h>
-#include <linux/rtnetlink.h>
  
  static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
                              struct tcf_meta_info *e)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index ddcf04b4ab43732c001869f70d63ea193768ebc3..b91ea03e3afa717225c00a3d2a03e9d722229fbc 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -23,7 +23,6 @@
  #include <linux/skbuff.h>
  #include <linux/init.h>
  #include <linux/kmod.h>
-#include <linux/err.h>
  #include <linux/slab.h>
  #include <net/net_namespace.h>
  #include <net/sock.h>
@@ -352,6 +351,8 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
  {
         struct tcf_chain *chain;
  
+       if (!block)
+               return;
         /* Hold a refcnt for all chains, except 0, so that they don't disappear
          * while we are iterating.
          */
@@ -378,8 +379,6 @@ void tcf_block_put(struct tcf_block *block)
  {
         struct tcf_block_ext_info ei = {0, };
  
-       if (!block)
-               return;
         tcf_block_put_ext(block, block->q, &ei);
  }
  
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c

index 6fe798c2df1a5303cd61cd3ad53cd2f9385d16de..8d78e7f4ecc33082517aaab5767a30c119f49dc0 100644 (file)
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
         struct list_head link;
         struct tcf_result res;
         bool exts_integrated;
-       bool offloaded;
         u32 gen_flags;
         struct tcf_exts exts;
         u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
  }
  
  static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-                              enum tc_clsbpf_command cmd)
+                              struct cls_bpf_prog *oldprog)
  {
-       bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
         struct tcf_block *block = tp->chain->block;
-       bool skip_sw = tc_skip_sw(prog->gen_flags);
         struct tc_cls_bpf_offload cls_bpf = {};
+       struct cls_bpf_prog *obj;
+       bool skip_sw;
         int err;
  
+       skip_sw = prog && tc_skip_sw(prog->gen_flags);
+       obj = prog ?: oldprog;
+
         tc_cls_common_offload_init(&cls_bpf.common, tp);
-       cls_bpf.command = cmd;
-       cls_bpf.exts = &prog->exts;
-       cls_bpf.prog = prog->filter;
-       cls_bpf.name = prog->bpf_name;
-       cls_bpf.exts_integrated = prog->exts_integrated;
-       cls_bpf.gen_flags = prog->gen_flags;
+       cls_bpf.command = TC_CLSBPF_OFFLOAD;
+       cls_bpf.exts = &obj->exts;
+       cls_bpf.prog = prog ? prog->filter : NULL;
+       cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+       cls_bpf.name = obj->bpf_name;
+       cls_bpf.exts_integrated = obj->exts_integrated;
+       cls_bpf.gen_flags = obj->gen_flags;
  
         err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-       if (addorrep) {
+       if (prog) {
                 if (err < 0) {
-                       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+                       cls_bpf_offload_cmd(tp, oldprog, prog);
                         return err;
                 } else if (err > 0) {
                         prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
                 }
         }
  
-       if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+       if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
                 return -EINVAL;
  
         return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
  static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
                            struct cls_bpf_prog *oldprog)
  {
-       struct cls_bpf_prog *obj = prog;
-       enum tc_clsbpf_command cmd;
-       bool skip_sw;
-       int ret;
-
-       skip_sw = tc_skip_sw(prog->gen_flags) ||
-               (oldprog && tc_skip_sw(oldprog->gen_flags));
-
-       if (oldprog && oldprog->offloaded) {
-               if (!tc_skip_hw(prog->gen_flags)) {
-                       cmd = TC_CLSBPF_REPLACE;
-               } else if (!tc_skip_sw(prog->gen_flags)) {
-                       obj = oldprog;
-                       cmd = TC_CLSBPF_DESTROY;
-               } else {
-                       return -EINVAL;
-               }
-       } else {
-               if (tc_skip_hw(prog->gen_flags))
-                       return skip_sw ? -EINVAL : 0;
-               cmd = TC_CLSBPF_ADD;
-       }
-
-       ret = cls_bpf_offload_cmd(tp, obj, cmd);
-       if (ret)
-               return ret;
+       if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+               return -EINVAL;
  
-       obj->offloaded = true;
-       if (oldprog)
-               oldprog->offloaded = false;
+       if (prog && tc_skip_hw(prog->gen_flags))
+               prog = NULL;
+       if (oldprog && tc_skip_hw(oldprog->gen_flags))
+               oldprog = NULL;
+       if (!prog && !oldprog)
+               return 0;
  
-       return 0;
+       return cls_bpf_offload_cmd(tp, prog, oldprog);
  }
  
  static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
  {
         int err;
  
-       if (!prog->offloaded)
-               return;
-
-       err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
-       if (err) {
+       err = cls_bpf_offload_cmd(tp, NULL, prog);
+       if (err)
                 pr_err("Stopping hardware offload failed: %d\n", err);
-               return;
-       }
-
-       prog->offloaded = false;
  }
  
  static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
                                          struct cls_bpf_prog *prog)
  {
-       if (!prog->offloaded)
-               return;
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_bpf_offload cls_bpf = {};
+
+       tc_cls_common_offload_init(&cls_bpf.common, tp);
+       cls_bpf.command = TC_CLSBPF_STATS;
+       cls_bpf.exts = &prog->exts;
+       cls_bpf.prog = prog->filter;
+       cls_bpf.name = prog->bpf_name;
+       cls_bpf.exts_integrated = prog->exts_integrated;
+       cls_bpf.gen_flags = prog->gen_flags;
  
-       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+       tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
  }
  
  static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c

index ac152b4f4247d61d1761886352a93cb03b585cbf..507859cdd1cb1e7d97751ebad5cd688cb02b14ea 100644 (file)
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -45,7 +45,6 @@
  #include <net/netlink.h>
  #include <net/act_api.h>
  #include <net/pkt_cls.h>
-#include <linux/netdevice.h>
  #include <linux/idr.h>
  
  struct tc_u_knode {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c

index b6c4f536876b70b0ad24fee686129a396e07f573..0f1eab99ff4edb6e7e27f4b4b34552b5ee996cbf 100644 (file)
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
         tcm->tcm_info = refcount_read(&q->refcnt);
         if (nla_put_string(skb, TCA_KIND, q->ops->id))
                 goto nla_put_failure;
+       if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
+               goto nla_put_failure;
         if (q->ops->dump && q->ops->dump(q, skb) < 0)
                 goto nla_put_failure;
         qlen = q->q.qlen;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c

index 5ecc38f35d4774fdfa402d9a4c4a0e655e1c91c2..fc1286f499c1462ab29c5054f734237788974e0e 100644 (file)
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -68,6 +68,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
         struct net_device *dev = qdisc_dev(sch);
         int err;
  
+       net_inc_ingress_queue();
+
         mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
  
         q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -78,7 +80,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
         if (err)
                 return err;
  
-       net_inc_ingress_queue();
         sch->flags |= TCQ_F_CPUSTATS;
  
         return 0;
@@ -172,6 +173,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
         struct net_device *dev = qdisc_dev(sch);
         int err;
  
+       net_inc_ingress_queue();
+       net_inc_egress_queue();
+
         mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
  
         q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
@@ -190,18 +194,11 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
  
         err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
         if (err)
-               goto err_egress_block_get;
-
-       net_inc_ingress_queue();
-       net_inc_egress_queue();
+               return err;
  
         sch->flags |= TCQ_F_CPUSTATS;
  
         return 0;
-
-err_egress_block_get:
-       tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
-       return err;
  }
  
  static void clsact_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c

index 9d874e60e0323dee6bb7410b0ec34186eaac19d7..f0747eb87dc4784e67e0b5872dcf37effaaa4060 100644 (file)
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
                 .handle = sch->handle,
                 .parent = sch->parent,
         };
+       int err;
  
         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
                 return -EOPNOTSUPP;
@@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable)
                 opt.command = TC_RED_DESTROY;
         }
  
-       return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+       err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+
+       if (!err && enable)
+               sch->flags |= TCQ_F_OFFLOADED;
+       else
+               sch->flags &= ~TCQ_F_OFFLOADED;
+
+       return err;
  }
  
  static void red_destroy(struct Qdisc *sch)
@@ -274,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
         return red_change(sch, opt);
  }
  
-static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
  {
         struct net_device *dev = qdisc_dev(sch);
         struct tc_red_qopt_offload hw_stats = {
@@ -286,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
                         .stats.qstats = &sch->qstats,
                 },
         };
-       int err;
  
-       opt->flags &= ~TC_RED_OFFLOADED;
-       if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-               return 0;
-
-       err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
-                                           &hw_stats);
-       if (err == -EOPNOTSUPP)
+       if (!(sch->flags & TCQ_F_OFFLOADED))
                 return 0;
  
-       if (!err)
-               opt->flags |= TC_RED_OFFLOADED;
-
-       return err;
+       return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+                                            &hw_stats);
  }
  
  static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -319,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
         int err;
  
         sch->qstats.backlog = q->qdisc->qstats.backlog;
-       err = red_dump_offload(sch, &opt);
+       err = red_dump_offload_stats(sch, &opt);
         if (err)
                 goto nla_put_failure;
  
@@ -347,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
                 .marked = q->stats.prob_mark + q->stats.forced_mark,
         };
  
-       if (tc_can_offload(dev) &&  dev->netdev_ops->ndo_setup_tc) {
+       if (sch->flags & TCQ_F_OFFLOADED) {
                 struct red_stats hw_stats = {0};
                 struct tc_red_qopt_offload hw_stats_request = {
                         .command = TC_RED_XSTATS,
diff --git a/net/sctp/debug.c b/net/sctp/debug.c

index 3f619fdcbf0a0b4a6f35ece8021c011f874a2d79..291c97b07058218635fcfcd06214aa79d74ec80d 100644 (file)
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
         case SCTP_CID_AUTH:
                 return "AUTH";
  
+       case SCTP_CID_RECONF:
+               return "RECONF";
+
         default:
                 break;
         }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c

index eb17a911aa29717ac0db25cf0662b8e24a420655..3253f724a995256084dcb1f6610de3384b475e79 100644 (file)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3891,13 +3891,17 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
         struct sctp_association *asoc;
         int retval = -EINVAL;
  
-       if (optlen < sizeof(struct sctp_reset_streams))
+       if (optlen < sizeof(*params))
                 return -EINVAL;
  
         params = memdup_user(optval, optlen);
         if (IS_ERR(params))
                 return PTR_ERR(params);
  
+       if (params->srs_number_streams * sizeof(__u16) >
+           optlen - sizeof(*params))
+               goto out;
+
         asoc = sctp_id2assoc(sk, params->srs_assoc_id);
         if (!asoc)
                 goto out;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c

index a71be33f3afeb0aaaef174ee082c4c547aab1e2d..e36ec5dd64c6ff969fc30aae893d1d5ca8c221bf 100644 (file)
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
  void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                       gfp_t gfp)
  {
-       struct sctp_association *asoc;
-       __u16 needed, freed;
-
-       asoc = ulpq->asoc;
+       struct sctp_association *asoc = ulpq->asoc;
+       __u32 freed = 0;
+       __u16 needed;
  
-       if (chunk) {
-               needed = ntohs(chunk->chunk_hdr->length);
-               needed -= sizeof(struct sctp_data_chunk);
-       } else
-               needed = SCTP_DEFAULT_MAXWINDOW;
-
-       freed = 0;
+       needed = ntohs(chunk->chunk_hdr->length) -
+                sizeof(struct sctp_data_chunk);
  
         if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
                 freed = sctp_ulpq_renege_order(ulpq, needed);
-               if (freed < needed) {
+               if (freed < needed)
                         freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
-               }
         }
         /* If able to free enough room, accept this chunk. */
-       if (chunk && (freed >= needed)) {
-               int retval;
-               retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+       if (freed >= needed) {
+               int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
                 /*
                  * Enter partial delivery if chunk has not been
                  * delivered; otherwise, drain the reassembly queue.
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c

index c4778cae58ef12c191958261a50e58e5f67082e8..444380f968f1158660f6a01a10cd8223c9db6081 100644 (file)
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
                         goto out_free_groups;
                 creds->cr_group_info->gid[i] = kgid;
         }
+       groups_sort(creds->cr_group_info);
  
         return 0;
  out_free_groups:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c

index 5dd4e6c9fef21f650db78907e0fa46ee09413c71..26531193fce4d07f4b6d513093544b4a760f96ab 100644 (file)
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd,
                                 goto out;
                         rsci.cred.cr_group_info->gid[i] = kgid;
                 }
+               groups_sort(rsci.cred.cr_group_info);
  
                 /* mech name */
                 len = qword_get(&mesg, buf, mlen);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c

index 740b67d5a733bdcd1ad10b6efdf957a8cd9a7889..af7f28fb8102e4313f5ced6aa585e30f3911ca6c 100644 (file)
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd,
                 ug.gi->gid[i] = kgid;
         }
  
+       groups_sort(ug.gi);
         ugp = unix_gid_lookup(cd, uid);
         if (ugp) {
                 struct cache_head *ch;
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
                 kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
                 cred->cr_group_info->gid[i] = kgid;
         }
+       groups_sort(cred->cr_group_info);
         if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
                 *authp = rpc_autherr_badverf;
                 return SVC_DENIED;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index 333b9d697ae5373d00c6001b9c7f75f3d6c0ed91..33b74fd8405185d906d07e315c9b5a83775e747d 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task)
  {
         struct rpc_rqst *req = task->tk_rqstp;
         struct rpc_xprt *xprt = req->rq_xprt;
+       unsigned int connect_cookie;
         int status, numreqs;
  
         dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task)
         } else if (!req->rq_bytes_sent)
                 return;
  
+       connect_cookie = xprt->connect_cookie;
         req->rq_xtime = ktime_get();
         status = xprt->ops->send_request(task);
         trace_xprt_transmit(xprt, req->rq_xid, status);
@@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task)
         xprt->stat.bklog_u += xprt->backlog.qlen;
         xprt->stat.sending_u += xprt->sending.qlen;
         xprt->stat.pending_u += xprt->pending.qlen;
+       spin_unlock_bh(&xprt->transport_lock);
  
-       /* Don't race with disconnect */
-       if (!xprt_connected(xprt))
-               task->tk_status = -ENOTCONN;
-       else {
+       req->rq_connect_cookie = connect_cookie;
+       if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
                 /*
-                * Sleep on the pending queue since
-                * we're expecting a reply.
+                * Sleep on the pending queue if we're expecting a reply.
+                * The spinlock ensures atomicity between the test of
+                * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
                  */
-               if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task))
+               spin_lock(&xprt->recv_lock);
+               if (!req->rq_reply_bytes_recvd) {
                         rpc_sleep_on(&xprt->pending, task, xprt_timer);
-               req->rq_connect_cookie = xprt->connect_cookie;
+                       /*
+                        * Send an extra queue wakeup call if the
+                        * connection was dropped in case the call to
+                        * rpc_sleep_on() raced.
+                        */
+                       if (!xprt_connected(xprt))
+                               xprt_wake_pending_tasks(xprt, -ENOTCONN);
+               }
+               spin_unlock(&xprt->recv_lock);
         }
-       spin_unlock_bh(&xprt->transport_lock);
  }
  
  static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c

index ed34dc0f144cce537fce51dcba5bb12fe0b6df1c..a3f2ab283aeba38b26514dd9eb0e948c71a9ee7e 100644 (file)
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1408,11 +1408,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
         dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
                 __func__, rep, req, be32_to_cpu(rep->rr_xid));
  
-       if (list_empty(&req->rl_registered) &&
-           !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
-               rpcrdma_complete_rqst(rep);
-       else
-               queue_work(rpcrdma_receive_wq, &rep->rr_work);
+       queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
         return;
  
  out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c

index 646c24494ea7eba7fb2a2296ba6339e8dbf8f31e..6ee1ad8978f3b2977de2798d1a76ded1af6f78c4 100644 (file)
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,6 +52,7 @@
  #include <linux/slab.h>
  #include <linux/seq_file.h>
  #include <linux/sunrpc/addr.h>
+#include <linux/smp.h>
  
  #include "xprt_rdma.h"
  
@@ -656,6 +657,7 @@ xprt_rdma_allocate(struct rpc_task *task)
                 task->tk_pid, __func__, rqst->rq_callsize,
                 rqst->rq_rcvsize, req);
  
+       req->rl_cpu = smp_processor_id();
         req->rl_connect_cookie = 0;     /* our reserved value */
         rpcrdma_set_xprtdata(rqst, req);
         rqst->rq_buffer = req->rl_sendbuf->rg_base;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c

index 710b3f77db82869cd23abb90ea308ca67beef2bf..8607c029c0dd820250f4547c68bda41b7daca313 100644 (file)
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void)
         struct workqueue_struct *recv_wq;
  
         recv_wq = alloc_workqueue("xprtrdma_receive",
-                                 WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+                                 WQ_MEM_RECLAIM | WQ_HIGHPRI,
                                   0);
         if (!recv_wq)
                 return -ENOMEM;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h

index 51686d9eac5f992d9d23d674f94df0e77f58bb72..1342f743f1c41acae0145a49962825aa1574311c 100644 (file)
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -342,6 +342,7 @@ enum {
  struct rpcrdma_buffer;
  struct rpcrdma_req {
         struct list_head        rl_list;
+       int                     rl_cpu;
         unsigned int            rl_connect_cookie;
         struct rpcrdma_buffer   *rl_buffer;
         struct rpcrdma_rep      *rl_reply;
diff --git a/net/tipc/group.c b/net/tipc/group.c

index 95fec2c057d6ebdb223e19ef83bf9c383cb2156e..7ebbdeb2a90e1d8429113b81370ad67f05ee633e 100644 (file)
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
         if (m->window >= ADV_IDLE)
                 return;
  
-       if (!list_empty(&m->congested))
-               return;
+       list_del_init(&m->congested);
  
         /* Sort member into congested members' list */
         list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -648,6 +647,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
         } else if (mtyp == GRP_REMIT_MSG) {
                 msg_set_grp_remitted(hdr, m->window);
         }
+       msg_set_dest_droppable(hdr, true);
         __skb_queue_tail(xmitq, skb);
  }
  
@@ -689,15 +689,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                         msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                         __skb_queue_tail(inputq, m->event_msg);
                 }
-               if (m->window < ADV_IDLE)
-                       tipc_group_update_member(m, 0);
-               else
-                       list_del_init(&m->congested);
+               list_del_init(&m->congested);
+               tipc_group_update_member(m, 0);
                 return;
         case GRP_LEAVE_MSG:
                 if (!m)
                         return;
                 m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+               list_del_init(&m->list);
+               list_del_init(&m->congested);
+               *usr_wakeup = true;
  
                 /* Wait until WITHDRAW event is received */
                 if (m->state != MBR_LEAVING) {
@@ -709,8 +710,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                 ehdr = buf_msg(m->event_msg);
                 msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                 __skb_queue_tail(inputq, m->event_msg);
-               *usr_wakeup = true;
-               list_del_init(&m->congested);
                 return;
         case GRP_ADV_MSG:
                 if (!m)
@@ -862,6 +861,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
                                 msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
                         __skb_queue_tail(inputq, skb);
                 }
+               list_del_init(&m->list);
                 list_del_init(&m->congested);
         }
         *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c

index 5d18c0caa92b213740e5c6e3152ec8ce37717dc2..41127d0b925ea4d515e7c7bbe6739dee99a442f2 100644 (file)
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
                                 __skb_dequeue(arrvq);
                                 __skb_queue_tail(inputq, skb);
                         }
-                       refcount_dec(&skb->users);
+                       kfree_skb(skb);
                         spin_unlock_bh(&inputq->lock);
                         continue;
                 }
diff --git a/net/wireless/Makefile b/net/wireless/Makefile

index 278d979c211a7e1f3581e2b33895f6ecfd160994..1d84f91bbfb0c8c9087e309821eb687325733358 100644 (file)
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -23,19 +23,36 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
  cfg80211-y += extra-certs.o
  endif
  
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
         @$(kecho) "  GEN     $@"
-       @echo '#include "reg.h"' > $@
-       @echo 'const u8 shipped_regdb_certs[] = {' >> $@
-       @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
-       @echo '};' >> $@
-       @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+       @(echo '#include "reg.h"'; \
+         echo 'const u8 shipped_regdb_certs[] = {'; \
+         cat $^ ; \
+         echo '};'; \
+         echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+        ) > $@
  
  $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
                       $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
         @$(kecho) "  GEN     $@"
-       @echo '#include "reg.h"' > $@
-       @echo 'const u8 extra_regdb_certs[] = {' >> $@
-       @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
-       @echo '};' >> $@
-       @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
+       @(set -e; \
+         allf=""; \
+         for f in $^ ; do \
+             # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
+             thisf=$$(od -An -v -tx1 < $$f | \
+                          sed -e 's/ /\n/g' | \
+                          sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
+                          sed -e 's/^/0x/;s/$$/,/'); \
+             # file should not be empty - maybe command substitution failed? \
+             test ! -z "$$thisf";\
+             allf=$$allf$$thisf;\
+         done; \
+         ( \
+             echo '#include "reg.h"'; \
+             echo 'const u8 extra_regdb_certs[] = {'; \
+             echo "$$allf"; \
+             echo '};'; \
+             echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
+         ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex

new file mode 100644 (file)

index 0000000..14ea666
--- /dev/null
+++ b/net/wireless/certs/sforshee.hex
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509

deleted file mode 100644 (file)

index c6f8f9d..0000000

Binary files a/net/wireless/certs/sforshee.x509 and /dev/null differ
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index b1ac23ca20c86be0af71e9a1ba92cc99d8d5a967..213d0c498c97d78b17c81d1fd8b850c8768f7057 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
         case NL80211_IFTYPE_AP:
                 if (wdev->ssid_len &&
                     nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                 break;
         case NL80211_IFTYPE_STATION:
         case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
                 if (!ssid_ie)
                         break;
                 if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                 break;
                 }
         default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
         genlmsg_end(msg, hdr);
         return 0;
  
+ nla_put_failure_locked:
+       wdev_unlock(wdev);
   nla_put_failure:
         genlmsg_cancel(msg, hdr);
         return -EMSGSIZE;
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl

index 040aa79e1d9d39c55df7a748565dea3296ec6a6f..31031f10fe56b0b15ec44916ca7996e174fd5090 100755 (executable)
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6233,28 +6233,6 @@ sub process {
                         }
                 }
  
-# whine about ACCESS_ONCE
-               if ($^V && $^V ge 5.10.0 &&
-                   $line =~ /\bACCESS_ONCE\s*$balanced_parens\s*(=(?!=))?\s*($FuncArg)?/) {
-                       my $par = $1;
-                       my $eq = $2;
-                       my $fun = $3;
-                       $par =~ s/^\(\s*(.*)\s*\)$/$1/;
-                       if (defined($eq)) {
-                               if (WARN("PREFER_WRITE_ONCE",
-                                        "Prefer WRITE_ONCE(<FOO>, <BAR>) over ACCESS_ONCE(<FOO>) = <BAR>\n" . $herecurr) &&
-                                   $fix) {
-                                       $fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)\s*$eq\s*\Q$fun\E/WRITE_ONCE($par, $fun)/;
-                               }
-                       } else {
-                               if (WARN("PREFER_READ_ONCE",
-                                        "Prefer READ_ONCE(<FOO>) over ACCESS_ONCE(<FOO>)\n" . $herecurr) &&
-                                   $fix) {
-                                       $fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)/READ_ONCE($par)/;
-                               }
-                       }
-               }
-
  # check for mutex_trylock_recursive usage
                 if ($line =~ /mutex_trylock_recursive/) {
                         ERROR("LOCKING",
diff --git a/scripts/faddr2line b/scripts/faddr2line

index 39e07d8574dd787c2af71937852156abb9b1a7fb..7721d5b2b0c04ee923b3c14216214db68fcb7dd0 100755 (executable)
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -44,10 +44,10 @@
  set -o errexit
  set -o nounset
  
-READELF="${CROSS_COMPILE}readelf"
-ADDR2LINE="${CROSS_COMPILE}addr2line"
-SIZE="${CROSS_COMPILE}size"
-NM="${CROSS_COMPILE}nm"
+READELF="${CROSS_COMPILE:-}readelf"
+ADDR2LINE="${CROSS_COMPILE:-}addr2line"
+SIZE="${CROSS_COMPILE:-}size"
+NM="${CROSS_COMPILE:-}nm"
  
  command -v awk >/dev/null 2>&1 || die "awk isn't installed"
  command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed"
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c

index b3b353d7252724e10f23a9288cd24aab3ef34007..f055ca10bbc1d33c9c1cee1fd913b7c930984ac1 100644 (file)
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
         return 0;
  }
  
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+                                    struct snd_rawmidi_info *info)
  {
         struct snd_rawmidi *rmidi;
         struct snd_rawmidi_str *pstr;
         struct snd_rawmidi_substream *substream;
  
-       mutex_lock(&register_mutex);
         rmidi = snd_rawmidi_search(card, info->device);
-       mutex_unlock(&register_mutex);
         if (!rmidi)
                 return -ENXIO;
         if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
         }
         return -ENXIO;
  }
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+       int ret;
+
+       mutex_lock(&register_mutex);
+       ret = __snd_rawmidi_info_select(card, info);
+       mutex_unlock(&register_mutex);
+       return ret;
+}
  EXPORT_SYMBOL(snd_rawmidi_info_select);
  
  static int snd_rawmidi_info_select_user(struct snd_card *card,
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c

index c19c81d230bd7423b4153d2266a45e09333f8714..b4f1b6e88305496f91d028ceb82fe9b8a6a60ccb 100644 (file)
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
  #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
  #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
                                 ((codec)->core.vendor_id == 0x80862800))
+#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
  #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
                                 || is_skylake(codec) || is_broxton(codec) \
-                               || is_kabylake(codec)) || is_geminilake(codec)
-
+                               || is_kabylake(codec)) || is_geminilake(codec) \
+                               || is_cannonlake(codec)
  #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
  #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
  #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",     patch_i915_hsw_hdmi),
  HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",    patch_i915_hsw_hdmi),
  HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",    patch_i915_hsw_hdmi),
  HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",   patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
  HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
  HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
  HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c

index 4b21f71d685c78fd00345b2e229541b493c614f0..6a4db00511ab14593e8a0d33500c547a2e9656ae 100644 (file)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5185,6 +5185,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
         }
  }
  
+/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
+static void alc274_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static hda_nid_t preferred_pairs[] = {
+               0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
+               0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       spec->gen.preferred_dacs = preferred_pairs;
+}
+
  /* for hda_fixup_thinkpad_acpi() */
  #include "thinkpad_helper.c"
  
@@ -5302,6 +5318,8 @@ enum {
         ALC233_FIXUP_LENOVO_MULTI_CODECS,
         ALC294_FIXUP_LENOVO_MIC_LOCATION,
         ALC700_FIXUP_INTEL_REFERENCE,
+       ALC274_FIXUP_DELL_BIND_DACS,
+       ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
  };
  
  static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6130,21 @@ static const struct hda_fixup alc269_fixups[] = {
                         {}
                 }
         },
+       [ALC274_FIXUP_DELL_BIND_DACS] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc274_fixup_bind_dacs,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
+       [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x0401102f },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC274_FIXUP_DELL_BIND_DACS
+       },
  };
  
  static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6578,7 +6611,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                 {0x14, 0x90170110},
                 {0x1b, 0x90a70130},
                 {0x21, 0x03211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
                 {0x12, 0xb7a60130},
                 {0x13, 0xb8a61140},
                 {0x16, 0x90170110},
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c

index 7c9e361b2200be081aca8f5a99d1b71a5846d30b..2b4ceda36291c01c6cca69d3a1cacd6c23014f40 100644 (file)
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
         kctl->private_value = (unsigned long)namelist;
         kctl->private_free = usb_mixer_selector_elem_free;
  
-       nameid = uac_selector_unit_iSelector(desc);
+       /* check the static mapping table at first */
         len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
-       if (len)
-               ;
-       else if (nameid)
-               len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
-                                        sizeof(kctl->id.name));
-       else
-               len = get_term_name(state, &state->oterm,
-                                   kctl->id.name, sizeof(kctl->id.name), 0);
-
         if (!len) {
-               strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+               /* no mapping ? */
+               /* if iSelector is given, use it */
+               nameid = uac_selector_unit_iSelector(desc);
+               if (nameid)
+                       len = snd_usb_copy_string_desc(state, nameid,
+                                                      kctl->id.name,
+                                                      sizeof(kctl->id.name));
+               /* ... or pick up the terminal name at next */
+               if (!len)
+                       len = get_term_name(state, &state->oterm,
+                                   kctl->id.name, sizeof(kctl->id.name), 0);
+               /* ... or use the fixed string "USB" as the last resort */
+               if (!len)
+                       strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
  
+               /* and add the proper suffix */
                 if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
                         append_ctl_name(kctl, " Clock Source");
                 else if ((state->oterm.type & 0xff00) == 0x0100)
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c

index 77eecaa4db1f32c9b7af87273c599181bf307443..a66ef5777887a78d7416e64c049c73b26477c7f7 100644 (file)
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
  /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
   * between PCM/DOP and native DSD mode
   */
-static bool is_teac_50X_dac(unsigned int id)
+static bool is_teac_dsd_dac(unsigned int id)
  {
         switch (id) {
         case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
+       case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
                 return true;
         }
         return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
                         break;
                 }
                 mdelay(20);
-       } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
+       } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
                 /* Vendor mode switch cmd is required. */
                 switch (fmt->altsetting) {
                 case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
         }
  
         /* TEAC devices with USB DAC functionality */
-       if (is_teac_50X_dac(chip->usb_id)) {
+       if (is_teac_dsd_dac(chip->usb_id)) {
                 if (fp->altsetting == 3)
                         return SNDRV_PCM_FMTBIT_DSD_U32_BE;
         }
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h

index cefe7c7cd4f6f29fabd90e33495d7a6d8ac6dbdd..0a8e37a519f258e72317f39a87ac9cc60ad47f6f 100644 (file)
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -2,7 +2,7 @@
  #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
  #define _UAPI__ASM_BPF_PERF_EVENT_H__
  
-#include <asm/ptrace.h>
+#include "ptrace.h"
  
  typedef user_pt_regs bpf_user_pt_regs_t;
  
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h

index c0b0e9e8aa66eb645eba71784e80aa93b0f0df79..800104c8a3edfee7f4f52a33b8451a51ee0ed90a 100644 (file)
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -266,6 +266,7 @@
  /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
  #define X86_FEATURE_CLZERO             (13*32+ 0) /* CLZERO instruction */
  #define X86_FEATURE_IRPERF             (13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR         (13*32+ 2) /* Always save/restore FP error pointers */
  
  /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
  #define X86_FEATURE_DTHERM             (14*32+ 0) /* Digital Thermal Sensor */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h

index 07fd03c74a775a48a5183f96cb85e27b4f85fedd..04e32f965ad7f038beb2d8db9dc2119e07628744 100644 (file)
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -84,8 +84,6 @@
  
  #define uninitialized_var(x) x = *(&(x))
  
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
  #include <linux/types.h>
  
  /*
@@ -135,20 +133,19 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  /*
   * Prevent the compiler from merging or refetching reads or writes. The
   * compiler is also forbidden from reordering successive instances of
- * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
- * compiler is aware of some particular ordering.  One way to make the
- * compiler aware of ordering is to put the two invocations of READ_ONCE,
- * WRITE_ONCE or ACCESS_ONCE() in different C statements.
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
   *
- * In contrast to ACCESS_ONCE these two macros will also work on aggregate
- * data types like structs or unions. If the size of the accessed data
- * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
- * compile-time warning.
+ * These two macros will also work on aggregate data types like structs or
+ * unions. If the size of the accessed data type exceeds the word size of
+ * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
+ * fall back to memcpy and print a compile-time warning.
   *
   * Their two major use cases are: (1) Mediating communication between
   * process-level code and irq/NMI handlers, all running on the same CPU,
- * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
   * mutilate accesses that either do not require ordering or that interact
   * with an explicit memory barrier or atomic instruction that provides the
   * required ordering.
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h

index 940c1b0756591e8e65792ce6dafbeb568a5aa15d..6b0c36a58fcbc38b67157b53efe2044bd848f137 100644 (file)
--- a/tools/include/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h
@@ -48,6 +48,7 @@ static inline int debug_locks_off(void)
  #define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
  #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
  #define pr_warn pr_err
+#define pr_cont pr_err
  
  #define list_del_rcu list_del
  
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h

new file mode 100644 (file)

index 0000000..13a5853
--- /dev/null
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,7 @@
+#if defined(__aarch64__)
+#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
+#else
+#include <uapi/asm-generic/bpf_perf_event.h>
+#endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h

index 282d7613fce8788bc466913d7fcacc960dd1c6de..496e59a2738ba99308f438e1f0509e66e17086cb 100644 (file)
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -630,9 +630,9 @@ struct kvm_s390_irq {
  
  struct kvm_s390_irq_state {
         __u64 buf;
-       __u32 flags;
+       __u32 flags;        /* will stay unused for compatibility reasons */
         __u32 len;
-       __u32 reserved[4];
+       __u32 reserved[4];  /* will stay unused for compatibility reasons */
  };
  
  /* for KVM_SET_GUEST_DEBUG */
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat

index 217cf6f95c366037ccd2ff3cedb1d61de22c616a..a5684d0968b4fd087905e659c0ce80bd170434c2 100755 (executable)
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -478,7 +478,7 @@ class Provider(object):
      @staticmethod
      def is_field_wanted(fields_filter, field):
          """Indicate whether field is valid according to fields_filter."""
-        if not fields_filter or fields_filter == "help":
+        if not fields_filter:
              return True
          return re.match(fields_filter, field) is not None
  
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
  
      def update_fields(self, fields_filter):
          """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self.get_available_fields()
+                       if self.is_field_wanted(fields_filter, field)]
  
      @staticmethod
      def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
              curses.nocbreak()
              curses.endwin()
  
-    def get_all_gnames(self):
+    @staticmethod
+    def get_all_gnames():
          """Returns a list of (pid, gname) tuples of all running guests"""
          res = []
          try:
@@ -963,7 +964,7 @@ class Tui(object):
              # perform a sanity check before calling the more expensive
              # function to possibly extract the guest name
              if ' -name ' in line[1]:
-                res.append((line[0], self.get_gname_from_pid(line[0])))
+                res.append((line[0], Tui.get_gname_from_pid(line[0])))
          child.stdout.close()
  
          return res
@@ -984,7 +985,8 @@ class Tui(object):
          except Exception:
              self.screen.addstr(row + 1, 2, 'Not available')
  
-    def get_pid_from_gname(self, gname):
+    @staticmethod
+    def get_pid_from_gname(gname):
          """Fuzzy function to convert guest name to QEMU process pid.
  
          Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
  
          """
          pids = []
-        for line in self.get_all_gnames():
+        for line in Tui.get_all_gnames():
              if gname == line[1]:
                  pids.append(int(line[0]))
  
@@ -1090,15 +1092,16 @@ class Tui(object):
              # sort by totals
              return (0, -stats[x][0])
          total = 0.
-        for val in stats.values():
-            total += val[0]
+        for key in stats.keys():
+            if key.find('(') is -1:
+                total += stats[key][0]
          if self._sorting == SORT_DEFAULT:
              sortkey = sortCurAvg
          else:
              sortkey = sortTotal
+        tavg = 0
          for key in sorted(stats.keys(), key=sortkey):
-
-            if row >= self.screen.getmaxyx()[0]:
+            if row >= self.screen.getmaxyx()[0] - 1:
                  break
              values = stats[key]
              if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
                  self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                     (key, values[0], values[0] * 100 / total,
                                      cur))
+                if cur is not '' and key.find('(') is -1:
+                    tavg += cur
              row += 1
          if row == 3:
              self.screen.addstr(4, 1, 'No matching events reported yet')
+        else:
+            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
+                               ('Total', total, tavg if tavg else ''),
+                               curses.A_BOLD)
          self.screen.refresh()
  
      def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
                  if char == 'x':
                      self.update_drilldown()
                      # prevents display of current values on next refresh
-                    self.stats.get()
+                    self.stats.get(self._display_guests)
              except KeyboardInterrupt:
                  break
              except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
          try:
              pids = Tui.get_pid_from_gname(val)
          except:
-            raise optparse.OptionValueError('Error while searching for guest '
-                                            '"{}", use "-p" to specify a pid '
-                                            'instead'.format(val))
+            sys.exit('Error while searching for guest "{}". Use "-p" to '
+                     'specify a pid instead?'.format(val))
          if len(pids) == 0:
-            raise optparse.OptionValueError('No guest by the name "{}" '
-                                            'found'.format(val))
+            sys.exit('Error: No guest by the name "{}" found'.format(val))
          if len(pids) > 1:
-            raise optparse.OptionValueError('Multiple processes found (pids: '
-                                            '{}) - use "-p" to specify a pid '
-                                            'instead'.format(" ".join(pids)))
+            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
+                     'to specify the desired pid'.format(" ".join(pids)))
          parser.values.pid = pids[0]
  
      optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
                           help='restrict statistics to guest by name',
                           callback=cb_guest_to_pid,
                           )
-    (options, _) = optparser.parse_args(sys.argv)
+    options, unkn = optparser.parse_args(sys.argv)
+    if len(unkn) != 1:
+        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+    try:
+        # verify that we were passed a valid regex up front
+        re.compile(options.fields)
+    except re.error:
+        sys.exit('Error: "' + options.fields + '" is not a valid regular '
+                 'expression')
+
      return options
  
  
@@ -1564,16 +1579,13 @@ def main():
  
      stats = Stats(options)
  
-    if options.fields == "help":
-        event_list = "\n"
-        s = stats.get()
-        for key in s.keys():
-            if key.find('(') != -1:
-                key = key[0:key.find('(')]
-            if event_list.find('\n' + key + '\n') == -1:
-                event_list += key + '\n'
-        sys.stdout.write(event_list)
-        return ""
+    if options.fields == 'help':
+        stats.fields_filter = None
+        event_list = []
+        for key in stats.get().keys():
+            event_list.append(key.split('(', 1)[0])
+        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
+        sys.exit(0)
  
      if options.log:
          log(stats)
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt

index e5cf836be8a1848bb82f39cfa3c7c75dcc67b4fa..b5b3810c9e945d7f3a39568840fbc5b73f84983b 100644 (file)
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
  *s*::   set update interval
  
  *x*::  toggle reporting of stats for child trace events
+ ::     *Note*: The stats for the parents summarize the respective child trace
+                events
  
  Press any other key to refresh statistics immediately.
  
@@ -86,7 +88,7 @@ OPTIONS
  
  -f<fields>::
  --fields=<fields>::
-       fields to display (regex)
+       fields to display (regex), "-f help" for a list of available events
  
  -h::
  --help::
diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt

index 12e377184ee4ad0c55d00c3784f08b393764a2bc..e0b85930dd773e87417e2b4957b8af61221b04c0 100644 (file)
--- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
  fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
  fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
  fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
  EndTable
  
  Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
  7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
  7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
  80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
  82: INVPCID Gy,Mdq (66)
  83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
  88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -896,7 +896,7 @@ EndTable
  
  GrpTable: Grp3_1
  0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
  2: NOT Eb
  3: NEG Eb
  4: MUL AL,Eb
@@ -970,6 +970,15 @@ GrpTable: Grp9
  EndTable
  
  GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
  EndTable
  
  # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt

index c4d55919fac19e06afbb00a4124fbf1b334b4d46..e0b85930dd773e87417e2b4957b8af61221b04c0 100644 (file)
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
  fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
  fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
  fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
  EndTable
  
  Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
  7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
  7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
  80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
  82: INVPCID Gy,Mdq (66)
  83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
  88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
  EndTable
  
  GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
  EndTable
  
  # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h

index efd78b827b0514275f9388b2c64329c45c025248..3a5cb5a6e94ad8fc039dc3410a6724263057a970 100644 (file)
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -70,7 +70,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md);
  static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
  {
         struct perf_event_mmap_page *pc = mm->base;
-       u64 head = ACCESS_ONCE(pc->data_head);
+       u64 head = READ_ONCE(pc->data_head);
         rmb();
         return head;
  }
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 21a2d76b67dc453b13fde4ae760b275f04285d06..05fc4e2e7b3a06523cce6adde01fbdf03b8bdac0 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,19 +1,8 @@
  # SPDX-License-Identifier: GPL-2.0
  
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-endif
-include $(srctree)/tools/scripts/Makefile.arch
-
-$(call detected_var,SRCARCH)
-
  LIBDIR := ../../../lib
  BPFDIR := $(LIBDIR)/bpf
  APIDIR := ../../../include/uapi
-ASMDIR:= ../../../arch/$(ARCH)/include/uapi
  GENDIR := ../../../../include/generated
  GENHDR := $(GENDIR)/autoconf.h
  
@@ -21,8 +10,8 @@ ifneq ($(wildcard $(GENHDR)),)
    GENFLAGS := -DHAVE_GENHDR
  endif
  
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(ASMDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDLIBS += -lcap -lelf -lrt
  
  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
         test_align test_verifier_log test_dev_cgroup
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c

index 69427531408dd22ef1887d473ab4bf6548e173b9..6761be18a91fccc2d4f8ad52b0f83fec293189ae 100644 (file)
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
                           info_len != sizeof(struct bpf_map_info) ||
                           strcmp((char *)map_infos[i].name, expected_map_name),
                           "get-map-info(fd)",
-                         "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+                         "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
                           err, errno,
                           map_infos[i].type, BPF_MAP_TYPE_ARRAY,
                           info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
                           *(int *)prog_infos[i].map_ids != map_infos[i].id ||
                           strcmp((char *)prog_infos[i].name, expected_prog_name),
                           "get-prog-info(fd)",
-                         "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+                         "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
                           err, errno, i,
                           prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
                           info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
                       memcmp(&prog_info, &prog_infos[i], info_len) ||
                       *(int *)prog_info.map_ids != saved_map_id,
                       "get-prog-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
                       err, errno, info_len, sizeof(struct bpf_prog_info),
                       memcmp(&prog_info, &prog_infos[i], info_len),
                       *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
                       memcmp(&map_info, &map_infos[i], info_len) ||
                       array_value != array_magic_value,
                       "check get-map-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
                       err, errno, info_len, sizeof(struct bpf_map_info),
                       memcmp(&map_info, &map_infos[i], info_len),
                       array_value, array_magic_value);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index 3c64f30cf63cc2b6adb532a3b1f3201533193f7f..b51017404c62d0dc8198afdf035016f6e5e2fd0b 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
                         BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
                         BPF_EXIT_INSN(),
                 },
-               .errstr_unpriv = "R1 subtraction from stack pointer",
-               .result_unpriv = REJECT,
-               .errstr = "R1 invalid mem access",
+               .errstr = "R1 subtraction from stack pointer",
                 .result = REJECT,
         },
         {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
                 },
                 .errstr = "misaligned stack access",
                 .result = REJECT,
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
         },
         {
                 "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
                 },
                 .result = REJECT,
                 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
         },
         {
                 "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
                 },
                 .result = REJECT,
                 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
         },
         {
                 "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
                         BPF_MOV64_IMM(BPF_REG_0, 0),
                         BPF_EXIT_INSN(),
                 },
-               .result = ACCEPT,
-               .result_unpriv = REJECT,
-               .errstr_unpriv = "R1 pointer += pointer",
+               .result = REJECT,
+               .errstr = "R1 pointer += pointer",
         },
         {
                 "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
                         BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
                                     offsetof(struct __sk_buff, data)),
                         BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
-                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, len)),
                         BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
                         BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
                         BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
                         BPF_MOV64_IMM(BPF_REG_0, 0),
                         BPF_EXIT_INSN(),
                 },
-               .errstr = "invalid access to packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
         },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map2 = { 3, 11 },
-               .errstr_unpriv = "R0 pointer += pointer",
-               .errstr = "R0 invalid mem access 'inv'",
-               .result_unpriv = REJECT,
+               .errstr = "R0 pointer += pointer",
                 .result = REJECT,
                 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
         },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS
         },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS
         },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS
         },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 bitwise operator &= on pointer",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 bitwise operator &= on pointer",
                 .result = REJECT,
-               .result_unpriv = REJECT,
         },
         {
                 "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 32-bit pointer arithmetic prohibited",
                 .result = REJECT,
-               .result_unpriv = REJECT,
         },
         {
                 "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 pointer arithmetic with /= operator",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 pointer arithmetic with /= operator",
                 .result = REJECT,
-               .result_unpriv = REJECT,
         },
         {
                 "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map_in_map = { 3 },
-               .errstr = "R1 type=inv expected=map_ptr",
-               .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+               .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
                 .result = REJECT,
         },
         {
@@ -6116,6 +6102,30 @@ static struct bpf_test tests[] = {
                 },
                 .result = ACCEPT,
         },
+       {
+               "ld_abs: tests on r6 and skb data reload helper",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+                       BPF_MOV64_IMM(BPF_REG_6, 0),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_skb_vlan_push),
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 42),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
         {
                 "ld_ind: check calling conv, r1",
                 .insns = {
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
                         BPF_JMP_IMM(BPF_JA, 0, 0, -7),
                 },
                 .fixup_map1 = { 4 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
         },
         {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
                         BPF_EXIT_INSN(),
                 },
                 .fixup_map1 = { 3 },
-               .errstr_unpriv = "R0 pointer comparison prohibited",
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                 .result = REJECT,
                 .result_unpriv = REJECT,
         },
@@ -6741,6 +6750,462 @@ static struct bpf_test tests[] = {
                 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                 .result = REJECT,
         },
+       {
+               "bounds check based on zero-extended MOV",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0x0000'0000'ffff'ffff */
+                       BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check based on sign-extended MOV. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 4294967295",
+               .result = REJECT
+       },
+       {
+               "bounds check based on sign-extended MOV. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xfff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 min value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test1",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value_size=8 off=1073741825",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test2",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value 1073741823",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check after truncation of non-boundary-crossing range",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       /* r2 = 0x10'0000'0000 */
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+                       /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        * difference to previous test: truncation via MOV32
+                        * instead of ALU32.
+                        */
+                       BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after wrapping 32-bit addition",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       /* r1 = 0x7fff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0xffff'fffe */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after shift with oversized count operand",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_2, 32),
+                       BPF_MOV64_IMM(BPF_REG_1, 1),
+                       /* r1 = (u32)1 << (u32)32 = ? */
+                       BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x0000, 0xffff] */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 max value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check after right shift of maybe-negative number",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       /* r1 = [-0x01, 0xfe] */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+                       /* r1 = 0 or 0xff'ffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* r1 = 0 or 0xffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 2147483646",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset 1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test3",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset -1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test4",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_1, 1000000),
+                       BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 1000000000000",
+               .result = REJECT
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(1),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
         {
                 "variable-offset ctx access",
                 .insns = {
@@ -6782,6 +7247,71 @@ static struct bpf_test tests[] = {
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_LWT_IN,
         },
+       {
+               "indirect variable-offset stack access",
+               .insns = {
+                       /* Fill the top 8 bytes of the stack */
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       /* Get an unknown value */
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+                       /* Make it small and 4-byte aligned */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+                       /* add it to fp.  We now have either fp-4 or fp-8, but
+                        * we don't know which
+                        */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+                       /* dereference it indirectly */
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 5 },
+               .errstr = "variable stack read R2",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_LWT_IN,
+       },
+       {
+               "direct stack access with 32-bit wraparound. test1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 2147483647",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 1073741823",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test3",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer offset 1073741822",
+               .result = REJECT
+       },
         {
                 "liveness pruning and write screening",
                 .insns = {
@@ -7103,6 +7633,19 @@ static struct bpf_test tests[] = {
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
         },
+       {
+               "pkt_end - pkt_start is allowed",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
         {
                 "XDP pkt read, pkt_end mangling, bad access 1",
                 .insns = {
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
                         BPF_MOV64_IMM(BPF_REG_0, 0),
                         BPF_EXIT_INSN(),
                 },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_XDP,
         },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
                         BPF_MOV64_IMM(BPF_REG_0, 0),
                         BPF_EXIT_INSN(),
                 },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                 .result = REJECT,
                 .prog_type = BPF_PROG_TYPE_XDP,
         },
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config

index e57b4ac40e72e0502dff75ea1d80c543280428eb..7177bea1fdfa62a1aa4e424d4dab665d8a9b7aaf 100644 (file)
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,3 +1,4 @@
  CONFIG_USER_NS=y
  CONFIG_BPF_SYSCALL=y
  CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c

index 66e5ce5b91f008d5dffc7848d0e99d730ad589b5..0304ffb714f272a5646ff8134abdae92f491c5ad 100644 (file)
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
  static int finish_exec_test(void)
  {
         /*
-        * In a sensible world, this would be check_invalid_segment(0, 1);
-        * For better or for worse, though, the LDT is inherited across exec.
-        * We can probably change this safely, but for now we test it.
+        * Older kernel versions did inherit the LDT on exec() which is
+        * wrong because exec() starts from a clean state.
          */
-       check_valid_segment(0, 1,
-                           AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
-                           42, true);
+       check_invalid_segment(0, 1);
  
         return nerrs ? 1 : 0;
  }
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c

index 8a1cd1616de41939243264a25d1ccc11c2d47e0e..c9c81614a66ad6245d831066b204b72b1af8817f 100644 (file)
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -50,14 +50,14 @@ static int parse_status(const char *value)
  
         while (*c != '\0') {
                 int port, status, speed, devid;
-               unsigned long socket;
+               int sockfd;
                 char lbusid[SYSFS_BUS_ID_SIZE];
                 struct usbip_imported_device *idev;
                 char hub[3];
  
-               ret = sscanf(c, "%2s  %d %d %d %x %lx %31s\n",
+               ret = sscanf(c, "%2s  %d %d %d %x %u %31s\n",
                                 hub, &port, &status, &speed,
-                               &devid, &socket, lbusid);
+                               &devid, &sockfd, lbusid);
  
                 if (ret < 5) {
                         dbg("sscanf failed: %d", ret);
@@ -66,7 +66,7 @@ static int parse_status(const char *value)
  
                 dbg("hub %s port %d status %d speed %d devid %x",
                                 hub, port, status, speed, devid);
-               dbg("socket %lx lbusid %s", socket, lbusid);
+               dbg("sockfd %u lbusid %s", sockfd, lbusid);
  
                 /* if a device is connected, look at it */
                 idev = &vhci_driver->idev[port];
@@ -106,7 +106,7 @@ static int parse_status(const char *value)
         return 0;
  }
  
-#define MAX_STATUS_NAME 16
+#define MAX_STATUS_NAME 18
  
  static int refresh_imported_device_list(void)
  {
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c

index 38bb171acebade83314d9556baa7893c7d203911..e6e81305ef469559ffde4013535be10019c30da4 100644 (file)
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -16,24 +16,41 @@
  #define unlikely(x)    (__builtin_expect(!!(x), 0))
  #define likely(x)    (__builtin_expect(!!(x), 1))
  #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+#define SIZE_MAX        (~(size_t)0)
+
  typedef pthread_spinlock_t  spinlock_t;
  
  typedef int gfp_t;
-static void *kmalloc(unsigned size, gfp_t gfp)
-{
-       return memalign(64, size);
-}
+#define __GFP_ZERO 0x1
  
-static void *kzalloc(unsigned size, gfp_t gfp)
+static void *kmalloc(unsigned size, gfp_t gfp)
  {
         void *p = memalign(64, size);
         if (!p)
                 return p;
-       memset(p, 0, size);
  
+       if (gfp & __GFP_ZERO)
+               memset(p, 0, size);
         return p;
  }
  
+static inline void *kzalloc(unsigned size, gfp_t flags)
+{
+       return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+       if (size != 0 && n > SIZE_MAX / size)
+               return NULL;
+       return kmalloc(n * size, flags);
+}
+
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+       return kmalloc_array(n, size, flags | __GFP_ZERO);
+}
+
  static void kfree(void *p)
  {
         if (p)
diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh

index 35b039864b778c1f03df563dce35ca59da1d79b4..0cf28aa6f21c3b392b8a90b4aa9e9087f6807b3e 100644 (file)
--- a/tools/vm/slabinfo-gnuplot.sh
+++ b/tools/vm/slabinfo-gnuplot.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
  
  # Sergey Senozhatsky, 2015
  # sergey.senozhatsky.work@gmail.com
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c

index f9555b1e7f158f5203c1aaba47002424d3279203..cc29a814832837f5fb237dfdf74845a284e04367 100644 (file)
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
  {
         struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
         struct arch_timer_context *vtimer;
+       u32 cnt_ctl;
  
-       if (!vcpu) {
-               pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
-               return IRQ_NONE;
-       }
-       vtimer = vcpu_vtimer(vcpu);
+       /*
+        * We may see a timer interrupt after vcpu_put() has been called which
+        * sets the CPU's vcpu pointer to NULL, because even though the timer
+        * has been disabled in vtimer_save_state(), the hardware interrupt
+        * signal may not have been retired from the interrupt controller yet.
+        */
+       if (!vcpu)
+               return IRQ_HANDLED;
  
+       vtimer = vcpu_vtimer(vcpu);
         if (!vtimer->irq.level) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               if (kvm_timer_irq_can_fire(vtimer))
+               cnt_ctl = read_sysreg_el0(cntv_ctl);
+               cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
+                          ARCH_TIMER_CTRL_IT_MASK;
+               if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
                         kvm_timer_update_irq(vcpu, true, vtimer);
         }
  
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
  
         /* Disable the virtual timer */
         write_sysreg_el0(0, cntv_ctl);
+       isb();
  
         vtimer->loaded = false;
  out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
         return 0;
  }
  
-int kvm_timer_hyp_init(void)
+int kvm_timer_hyp_init(bool has_gic)
  {
         struct arch_timer_kvm_info *info;
         int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
                 return err;
         }
  
-       err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
-       if (err) {
-               kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
-               goto out_free_irq;
+       if (has_gic) {
+               err = irq_set_vcpu_affinity(host_vtimer_irq,
+                                           kvm_get_running_vcpus());
+               if (err) {
+                       kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+                       goto out_free_irq;
+               }
         }
  
         kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
  no_vgic:
         preempt_disable();
         timer->enabled = 1;
-       if (!irqchip_in_kernel(vcpu->kvm))
-               kvm_timer_vcpu_load_user(vcpu);
-       else
-               kvm_timer_vcpu_load_vgic(vcpu);
+       kvm_timer_vcpu_load(vcpu);
         preempt_enable();
  
         return 0;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c

index 6b60c98a6e2294c773eb20ea4794445a667415ea..2e43f9d42bd5db2a07438bb98f5e029c6246adb4 100644 (file)
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
         /*
          * Init HYP architected timer support
          */
-       err = kvm_timer_hyp_init();
+       err = kvm_timer_hyp_init(vgic_present);
         if (err)
                 goto out;
  
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c

index b6e715fd3c90af8c74408b72652f9974a3fb894d..dac7ceb1a677746cadb086a2cf8a07d8e560373c 100644 (file)
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 }
  
                 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-                              data);
+                              &data);
                 data = vcpu_data_host_to_guest(vcpu, data, len);
                 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
         }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
                                                len);
  
-               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
                 kvm_mmio_write_buf(data_buf, len, data);
  
                 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                        data_buf);
         } else {
                 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-                              fault_ipa, 0);
+                              fault_ipa, NULL);
  
                 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                       data_buf);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c

index b36945d49986dd5c0f097f16837d72d81f655308..b4b69c2d10120237e12bc6524243071bf645f1d9 100644 (file)
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
   */
  void free_hyp_pgds(void)
  {
-       unsigned long addr;
-
         mutex_lock(&kvm_hyp_pgd_mutex);
  
         if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
  
         if (hyp_pgd) {
                 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-               for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-               for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
+                               (uintptr_t)high_memory - PAGE_OFFSET);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
+                               VMALLOC_END - VMALLOC_START);
  
                 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
                 hyp_pgd = NULL;
author	Dave Airlie <airlied@redhat.com>
	Wed, 27 Dec 2017 19:20:07 +0000 (05:20 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Wed, 27 Dec 2017 19:20:07 +0000 (05:20 +1000)