RDMA/odp: Use the common interval tree library instead of generic
authorJason Gunthorpe <jgg@mellanox.com>
Mon, 19 Aug 2019 11:16:59 +0000 (14:16 +0300)
committerJason Gunthorpe <jgg@mellanox.com>
Wed, 21 Aug 2019 16:34:09 +0000 (13:34 -0300)
ODP is working with userspace VA's in the interval tree which always fit
into an unsigned long, so we can use the common code.

This comes at a cost of a 16 byte increase in ib_umem_odp struct size due
to storing the interval tree start/last in addition to the umem
addr/length. However these values were computed and are performance
critical for the interval lookup, so this seems like a worthwhile trade
off.

Removes 2k of .text from the kernel.

Link: https://lore.kernel.org/r/20190819111710.18440-2-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/Kconfig
drivers/infiniband/core/umem_odp.c
include/rdma/ib_umem_odp.h

index 85e103b147cc387cdf1a9a96fa3bf97341d206a7..b44b1c322ec82af12a3d2c02d180e426a5d430ae 100644 (file)
@@ -55,6 +55,7 @@ config INFINIBAND_ON_DEMAND_PAGING
        bool "InfiniBand on-demand paging support"
        depends on INFINIBAND_USER_MEM
        select MMU_NOTIFIER
+       select INTERVAL_TREE
        default y
        ---help---
          On demand paging support for the InfiniBand subsystem.
index c0e15db346808db3efc8222eddb14abc1212be05..6c17a7c3a565f9dd52af518e27b82fb0f0739785 100644 (file)
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
-#include <linux/interval_tree_generic.h>
+#include <linux/interval_tree.h>
 #include <linux/pagemap.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem_odp.h>
 
-/*
- * The ib_umem list keeps track of memory regions for which the HW
- * device request to receive notification when the related memory
- * mapping is changed.
- *
- * ib_umem_lock protects the list.
- */
-
-static u64 node_start(struct umem_odp_node *n)
-{
-       struct ib_umem_odp *umem_odp =
-                       container_of(n, struct ib_umem_odp, interval_tree);
-
-       return ib_umem_start(umem_odp);
-}
-
-/* Note that the representation of the intervals in the interval tree
- * considers the ending point as contained in the interval, while the
- * function ib_umem_end returns the first address which is not contained
- * in the umem.
- */
-static u64 node_last(struct umem_odp_node *n)
-{
-       struct ib_umem_odp *umem_odp =
-                       container_of(n, struct ib_umem_odp, interval_tree);
-
-       return ib_umem_end(umem_odp) - 1;
-}
-
-INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
-                    node_start, node_last, static, rbt_ib_umem)
-
 static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
 {
        mutex_lock(&umem_odp->umem_mutex);
@@ -205,9 +173,18 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
        struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
 
        down_write(&per_mm->umem_rwsem);
-       if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-               rbt_ib_umem_insert(&umem_odp->interval_tree,
-                                  &per_mm->umem_tree);
+       if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) {
+               /*
+                * Note that the representation of the intervals in the
+                * interval tree considers the ending point as contained in
+                * the interval, while the function ib_umem_end returns the
+                * first address which is not contained in the umem.
+                */
+               umem_odp->interval_tree.start = ib_umem_start(umem_odp);
+               umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
+               interval_tree_insert(&umem_odp->interval_tree,
+                                    &per_mm->umem_tree);
+       }
        up_write(&per_mm->umem_rwsem);
 }
 
@@ -217,8 +194,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
 
        down_write(&per_mm->umem_rwsem);
        if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-               rbt_ib_umem_remove(&umem_odp->interval_tree,
-                                  &per_mm->umem_tree);
+               interval_tree_remove(&umem_odp->interval_tree,
+                                    &per_mm->umem_tree);
        complete_all(&umem_odp->notifier_completion);
 
        up_write(&per_mm->umem_rwsem);
@@ -761,18 +738,18 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
                                  void *cookie)
 {
        int ret_val = 0;
-       struct umem_odp_node *node, *next;
+       struct interval_tree_node *node, *next;
        struct ib_umem_odp *umem;
 
        if (unlikely(start == last))
                return ret_val;
 
-       for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+       for (node = interval_tree_iter_first(root, start, last - 1);
                        node; node = next) {
                /* TODO move the blockable decision up to the callback */
                if (!blockable)
                        return -EAGAIN;
-               next = rbt_ib_umem_iter_next(node, start, last - 1);
+               next = interval_tree_iter_next(node, start, last - 1);
                umem = container_of(node, struct ib_umem_odp, interval_tree);
                ret_val = cb(umem, start, last, cookie) || ret_val;
        }
@@ -780,16 +757,3 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
        return ret_val;
 }
 EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
-
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-                                      u64 addr, u64 length)
-{
-       struct umem_odp_node *node;
-
-       node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
-       if (node)
-               return container_of(node, struct ib_umem_odp, interval_tree);
-       return NULL;
-
-}
-EXPORT_SYMBOL(rbt_ib_umem_lookup);
index 479db5c98ff60f38fc9148fe3fd8f1632a4c4847..030d5cbad02cd6c8e06ab02ec067b37911c85bc1 100644 (file)
 #include <rdma/ib_verbs.h>
 #include <linux/interval_tree.h>
 
-struct umem_odp_node {
-       u64 __subtree_last;
-       struct rb_node rb;
-};
-
 struct ib_umem_odp {
        struct ib_umem umem;
        struct ib_ucontext_per_mm *per_mm;
@@ -72,7 +67,7 @@ struct ib_umem_odp {
        int npages;
 
        /* Tree tracking */
-       struct umem_odp_node    interval_tree;
+       struct interval_tree_node interval_tree;
 
        struct completion       notifier_completion;
        int                     dying;
@@ -163,8 +158,17 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
  * Find first region intersecting with address range.
  * Return NULL if not found
  */
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-                                      u64 addr, u64 length);
+static inline struct ib_umem_odp *
+rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
+{
+       struct interval_tree_node *node;
+
+       node = interval_tree_iter_first(root, addr, addr + length - 1);
+       if (!node)
+               return NULL;
+       return container_of(node, struct ib_umem_odp, interval_tree);
+
+}
 
 static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
                                             unsigned long mmu_seq)