writeback: track number of inodes under writeback
authorJan Kara <jack@suse.cz>
Thu, 2 Sep 2021 21:53:04 +0000 (14:53 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 16:58:10 +0000 (09:58 -0700)
Patch series "writeback: Fix bandwidth estimates", v4.

Fix estimate of writeback throughput when device is not fully busy doing
writeback.  Michael Stapelberg has reported that such workload (e.g.
generated by linking) tends to push estimated throughput down to 0 and as
a result writeback on the device is practically stalled.

The first three patches fix the reported issue, the remaining two patches
are unrelated cleanups of problems I've noticed when reading the code.

This patch (of 4):

Track number of inodes under writeback for each bdi_writeback structure.
We will use this to decide whether wb does any IO and so we can estimate
its writeback throughput.  In principle we could use number of pages under
writeback (WB_WRITEBACK counter) for this however normal percpu counter
reads are too inaccurate for our purposes and summing the counter is too
expensive.

Link: https://lkml.kernel.org/r/20210713104519.16394-1-jack@suse.cz
Link: https://lkml.kernel.org/r/20210713104716.22868-1-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Michael Stapelberg <stapelberg+linux@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/fs-writeback.c
include/linux/backing-dev-defs.h
mm/backing-dev.c
mm/page-writeback.c

index 4c33705489825a8205e3e372415f4aa50ef41147..7439ecd44ac9e471e31a8758cac7e3bb0a906dc4 100644 (file)
@@ -406,6 +406,11 @@ static bool inode_do_switch_wbs(struct inode *inode,
                inc_wb_stat(new_wb, WB_WRITEBACK);
        }
 
+       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+               atomic_dec(&old_wb->writeback_inodes);
+               atomic_inc(&new_wb->writeback_inodes);
+       }
+
        wb_get(new_wb);
 
        /*
index 1d7edad9914fc5b8579d2ae6481287f8a0c67acf..06fb8e13f6bc510140774c11374e92ad0cba1ab5 100644 (file)
@@ -116,6 +116,7 @@ struct bdi_writeback {
        struct list_head b_dirty_time;  /* time stamps are dirty */
        spinlock_t list_lock;           /* protects the b_* lists */
 
+       atomic_t writeback_inodes;      /* number of inodes under writeback */
        struct percpu_counter stat[NR_WB_STAT_ITEMS];
 
        unsigned long congested;        /* WB_[a]sync_congested flags */
index f5561ea7d90ad62c28eb50e07f61e8c78df2d9d8..b4c707ddedb1b435f27e92d0a5961248f90f7237 100644 (file)
@@ -293,6 +293,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
        INIT_LIST_HEAD(&wb->b_dirty_time);
        spin_lock_init(&wb->list_lock);
 
+       atomic_set(&wb->writeback_inodes, 0);
        wb->bw_time_stamp = jiffies;
        wb->balanced_dirty_ratelimit = INIT_BW;
        wb->dirty_ratelimit = INIT_BW;
index 9f63548f247c33690c267d750fd031d6ee134ccf..e1aa1c9d8e362955ad1388c729f195acd46e7ca0 100644 (file)
@@ -2731,6 +2731,16 @@ int clear_page_dirty_for_io(struct page *page)
 }
 EXPORT_SYMBOL(clear_page_dirty_for_io);
 
+static void wb_inode_writeback_start(struct bdi_writeback *wb)
+{
+       atomic_inc(&wb->writeback_inodes);
+}
+
+static void wb_inode_writeback_end(struct bdi_writeback *wb)
+{
+       atomic_dec(&wb->writeback_inodes);
+}
+
 int test_clear_page_writeback(struct page *page)
 {
        struct address_space *mapping = page_mapping(page);
@@ -2752,6 +2762,9 @@ int test_clear_page_writeback(struct page *page)
 
                                dec_wb_stat(wb, WB_WRITEBACK);
                                __wb_writeout_inc(wb);
+                               if (!mapping_tagged(mapping,
+                                                   PAGECACHE_TAG_WRITEBACK))
+                                       wb_inode_writeback_end(wb);
                        }
                }
 
@@ -2794,8 +2807,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                                                   PAGECACHE_TAG_WRITEBACK);
 
                        xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
-                       if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
-                               inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+                       if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
+                               struct bdi_writeback *wb = inode_to_wb(inode);
+
+                               inc_wb_stat(wb, WB_WRITEBACK);
+                               if (!on_wblist)
+                                       wb_inode_writeback_start(wb);
+                       }
 
                        /*
                         * We can come through here when swapping anonymous