Btrfs: use REQ_CGROUP_PUNT for worker thread submitted bios
authorChris Mason <clm@fb.com>
Wed, 10 Jul 2019 19:28:17 +0000 (12:28 -0700)
committerDavid Sterba <dsterba@suse.com>
Mon, 18 Nov 2019 11:46:53 +0000 (12:46 +0100)
Async CRCs and compression submit IO through helper threads, which means
they have IO priority inversions when cgroup IO controllers are in use.

This flags all of the writes submitted by btrfs helper threads as
REQ_CGROUP_PUNT.  submit_bio() will punt these to dedicated per-blkcg
work items to avoid the priority inversion.

For the compression code, we take a reference on the wbc's blkg css and
pass it down to the async workers.

For the async CRCs, the bio already has the correct css, we just need to
tell the block layer to use REQ_CGROUP_PUNT.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Chris Mason <clm@fb.com>
Modified-and-reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/compression.c
fs/btrfs/compression.h
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c

index ae7d93bfba97de8bb8451cf70f78e0fb1b29fa14..d70c464074209e42cc4db862385c048c1fea4c01 100644 (file)
@@ -311,7 +311,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                 unsigned long compressed_len,
                                 struct page **compressed_pages,
                                 unsigned long nr_pages,
-                                unsigned int write_flags)
+                                unsigned int write_flags,
+                                struct cgroup_subsys_state *blkcg_css)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct bio *bio = NULL;
@@ -346,6 +347,11 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
        bio->bi_opf = REQ_OP_WRITE | write_flags;
        bio->bi_private = cb;
        bio->bi_end_io = end_compressed_bio_write;
+
+       if (blkcg_css) {
+               bio->bi_opf |= REQ_CGROUP_PUNT;
+               bio_associate_blkg_from_css(bio, blkcg_css);
+       }
        refcount_set(&cb->pending_bios, 1);
 
        /* create and submit bios for the compressed pages */
index 4cb8be9ff88b0e19ee77e0f21954e8badbc88d87..dd392278ab3f2324f0099f2afb03480b03323af3 100644 (file)
@@ -93,7 +93,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                  unsigned long compressed_len,
                                  struct page **compressed_pages,
                                  unsigned long nr_pages,
-                                 unsigned int write_flags);
+                                 unsigned int write_flags,
+                                 struct cgroup_subsys_state *blkcg_css);
 blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags);
 
index 18f80b31f05f4d9432ca2a33a5512931f2d2bf31..0bc6e3e9f7c34d8426cefed9ef36b94082adbddd 100644 (file)
@@ -791,6 +791,12 @@ static void run_one_async_done(struct btrfs_work *work)
                return;
        }
 
+       /*
+        * All of the bios that pass through here are from async helpers.
+        * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
+        * This changes nothing when cgroups aren't in use.
+        */
+       async->bio->bi_opf |= REQ_CGROUP_PUNT;
        ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
        if (ret) {
                async->bio->bi_status = ret;
index 4e550a3e7e34a84563561aa2bfdec4d7502124e1..16b5d8e02e77427b94521636db823bb8706367be 100644 (file)
@@ -4250,6 +4250,9 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
                .nr_to_write    = nr_pages * 2,
                .range_start    = start,
                .range_end      = end + 1,
+               /* We're called from an async helper function */
+               .punt_to_cgroup = 1,
+               .no_cgroup_owner = 1,
        };
 
        while (start <= end) {
index 861105e11b377e480a8d7c87fa2e53e7e4c806ba..7ed0fe2dd5bafdc81f690edbfe3c9262c91618ec 100644 (file)
@@ -368,6 +368,7 @@ struct async_chunk {
        u64 end;
        unsigned int write_flags;
        struct list_head extents;
+       struct cgroup_subsys_state *blkcg_css;
        struct btrfs_work work;
        atomic_t *pending;
 };
@@ -880,7 +881,8 @@ retry:
                                    ins.objectid,
                                    ins.offset, async_extent->pages,
                                    async_extent->nr_pages,
-                                   async_chunk->write_flags)) {
+                                   async_chunk->write_flags,
+                                   async_chunk->blkcg_css)) {
                        struct page *p = async_extent->pages[0];
                        const u64 start = async_extent->start;
                        const u64 end = start + async_extent->ram_size - 1;
@@ -1198,6 +1200,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
        async_chunk = container_of(work, struct async_chunk, work);
        if (async_chunk->inode)
                btrfs_add_delayed_iput(async_chunk->inode);
+       if (async_chunk->blkcg_css)
+               css_put(async_chunk->blkcg_css);
        /*
         * Since the pointer to 'pending' is at the beginning of the array of
         * async_chunk's, freeing it ensures the whole array has been freed.
@@ -1206,12 +1210,15 @@ static noinline void async_cow_free(struct btrfs_work *work)
                kvfree(async_chunk->pending);
 }
 
-static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+static int cow_file_range_async(struct inode *inode,
+                               struct writeback_control *wbc,
+                               struct page *locked_page,
                                u64 start, u64 end, int *page_started,
                                unsigned long *nr_written,
                                unsigned int write_flags)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
        struct async_cow *ctx;
        struct async_chunk *async_chunk;
        unsigned long nr_pages;
@@ -1279,12 +1286,30 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
                 * to unlock it.
                 */
                if (locked_page) {
+                       /*
+                        * Depending on the compressibility, the pages might or
+                        * might not go through async.  We want all of them to
+                        * be accounted against wbc once.  Let's do it here
+                        * before the paths diverge.  wbc accounting is used
+                        * only for foreign writeback detection and doesn't
+                        * need full accuracy.  Just account the whole thing
+                        * against the first page.
+                        */
+                       wbc_account_cgroup_owner(wbc, locked_page,
+                                                cur_end - start);
                        async_chunk[i].locked_page = locked_page;
                        locked_page = NULL;
                } else {
                        async_chunk[i].locked_page = NULL;
                }
 
+               if (blkcg_css != blkcg_root_css) {
+                       css_get(blkcg_css);
+                       async_chunk[i].blkcg_css = blkcg_css;
+               } else {
+                       async_chunk[i].blkcg_css = NULL;
+               }
+
                btrfs_init_work(&async_chunk[i].work, async_cow_start,
                                async_cow_submit, async_cow_free);
 
@@ -1727,7 +1752,7 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
        } else {
                set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
                        &BTRFS_I(inode)->runtime_flags);
-               ret = cow_file_range_async(inode, locked_page, start, end,
+               ret = cow_file_range_async(inode, wbc, locked_page, start, end,
                                           page_started, nr_written,
                                           write_flags);
        }