ipc/mqueue: optimize msg_get()
authorDavidlohr Bueso <dave@stgolabs.net>
Tue, 14 May 2019 22:46:26 +0000 (15:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 15 May 2019 02:52:52 +0000 (19:52 -0700)
Our msg priorities became an rbtree as of d6629859b36d ("ipc/mqueue:
improve performance of send/recv").  However, consuming a msg in
msg_get() remains logarithmic (still being better than the case before
of course).  By applying well known techniques to cache pointers we can
have the node with the highest priority in O(1), which is specially nice
for the rt cases.  Furthermore, some callers can call msg_get() in a
loop.

A new msg_tree_erase() helper is also added to encapsulate the tree
removal and node_cache game.  Passes ltp mq testcases.

Link: http://lkml.kernel.org/r/20190321190216.1719-2-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
ipc/mqueue.c

index 8c6a04111fded6932717336fc9062cc161199009..216cad1ff0d0c475c6f6755156abc57d89573e0c 100644 (file)
@@ -76,6 +76,7 @@ struct mqueue_inode_info {
        wait_queue_head_t wait_q;
 
        struct rb_root msg_tree;
        wait_queue_head_t wait_q;
 
        struct rb_root msg_tree;
+       struct rb_node *msg_tree_rightmost;
        struct posix_msg_tree_node *node_cache;
        struct mq_attr attr;
 
        struct posix_msg_tree_node *node_cache;
        struct mq_attr attr;
 
@@ -131,6 +132,7 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
 {
        struct rb_node **p, *parent = NULL;
        struct posix_msg_tree_node *leaf;
 {
        struct rb_node **p, *parent = NULL;
        struct posix_msg_tree_node *leaf;
+       bool rightmost = true;
 
        p = &info->msg_tree.rb_node;
        while (*p) {
 
        p = &info->msg_tree.rb_node;
        while (*p) {
@@ -139,9 +141,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
 
                if (likely(leaf->priority == msg->m_type))
                        goto insert_msg;
 
                if (likely(leaf->priority == msg->m_type))
                        goto insert_msg;
-               else if (msg->m_type < leaf->priority)
+               else if (msg->m_type < leaf->priority) {
                        p = &(*p)->rb_left;
                        p = &(*p)->rb_left;
-               else
+                       rightmost = false;
+               } else
                        p = &(*p)->rb_right;
        }
        if (info->node_cache) {
                        p = &(*p)->rb_right;
        }
        if (info->node_cache) {
@@ -154,6 +157,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
                INIT_LIST_HEAD(&leaf->msg_list);
        }
        leaf->priority = msg->m_type;
                INIT_LIST_HEAD(&leaf->msg_list);
        }
        leaf->priority = msg->m_type;
+
+       if (rightmost)
+               info->msg_tree_rightmost = &leaf->rb_node;
+
        rb_link_node(&leaf->rb_node, parent, p);
        rb_insert_color(&leaf->rb_node, &info->msg_tree);
 insert_msg:
        rb_link_node(&leaf->rb_node, parent, p);
        rb_insert_color(&leaf->rb_node, &info->msg_tree);
 insert_msg:
@@ -163,23 +170,35 @@ insert_msg:
        return 0;
 }
 
        return 0;
 }
 
+static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
+                                 struct mqueue_inode_info *info)
+{
+       struct rb_node *node = &leaf->rb_node;
+
+       if (info->msg_tree_rightmost == node)
+               info->msg_tree_rightmost = rb_prev(node);
+
+       rb_erase(node, &info->msg_tree);
+       if (info->node_cache) {
+               kfree(leaf);
+       } else {
+               info->node_cache = leaf;
+       }
+}
+
 static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
 {
 static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
 {
-       struct rb_node **p, *parent = NULL;
+       struct rb_node *parent = NULL;
        struct posix_msg_tree_node *leaf;
        struct msg_msg *msg;
 
 try_again:
        struct posix_msg_tree_node *leaf;
        struct msg_msg *msg;
 
 try_again:
-       p = &info->msg_tree.rb_node;
-       while (*p) {
-               parent = *p;
-               /*
-                * During insert, low priorities go to the left and high to the
-                * right.  On receive, we want the highest priorities first, so
-                * walk all the way to the right.
-                */
-               p = &(*p)->rb_right;
-       }
+       /*
+        * During insert, low priorities go to the left and high to the
+        * right.  On receive, we want the highest priorities first, so
+        * walk all the way to the right.
+        */
+       parent = info->msg_tree_rightmost;
        if (!parent) {
                if (info->attr.mq_curmsgs) {
                        pr_warn_once("Inconsistency in POSIX message queue, "
        if (!parent) {
                if (info->attr.mq_curmsgs) {
                        pr_warn_once("Inconsistency in POSIX message queue, "
@@ -194,24 +213,14 @@ try_again:
                pr_warn_once("Inconsistency in POSIX message queue, "
                             "empty leaf node but we haven't implemented "
                             "lazy leaf delete!\n");
                pr_warn_once("Inconsistency in POSIX message queue, "
                             "empty leaf node but we haven't implemented "
                             "lazy leaf delete!\n");
-               rb_erase(&leaf->rb_node, &info->msg_tree);
-               if (info->node_cache) {
-                       kfree(leaf);
-               } else {
-                       info->node_cache = leaf;
-               }
+               msg_tree_erase(leaf, info);
                goto try_again;
        } else {
                msg = list_first_entry(&leaf->msg_list,
                                       struct msg_msg, m_list);
                list_del(&msg->m_list);
                if (list_empty(&leaf->msg_list)) {
                goto try_again;
        } else {
                msg = list_first_entry(&leaf->msg_list,
                                       struct msg_msg, m_list);
                list_del(&msg->m_list);
                if (list_empty(&leaf->msg_list)) {
-                       rb_erase(&leaf->rb_node, &info->msg_tree);
-                       if (info->node_cache) {
-                               kfree(leaf);
-                       } else {
-                               info->node_cache = leaf;
-                       }
+                       msg_tree_erase(leaf, info);
                }
        }
        info->attr.mq_curmsgs--;
                }
        }
        info->attr.mq_curmsgs--;
@@ -254,6 +263,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
                info->qsize = 0;
                info->user = NULL;      /* set when all is ok */
                info->msg_tree = RB_ROOT;
                info->qsize = 0;
                info->user = NULL;      /* set when all is ok */
                info->msg_tree = RB_ROOT;
+               info->msg_tree_rightmost = NULL;
                info->node_cache = NULL;
                memset(&info->attr, 0, sizeof(info->attr));
                info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
                info->node_cache = NULL;
                memset(&info->attr, 0, sizeof(info->attr));
                info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,