f2fs: fix to avoid broken of dnode block list

author Chao Yu <yuchao0@huawei.com>

Thu, 2 Aug 2018 15:03:19 +0000 (23:03 +0800)

committer Jaegeuk Kim <jaegeuk@kernel.org>

Fri, 10 Aug 2018 23:19:05 +0000 (16:19 -0700)
author Chao Yu <yuchao0@huawei.com>
Thu, 2 Aug 2018 15:03:19 +0000 (23:03 +0800)
committer Jaegeuk Kim <jaegeuk@kernel.org>
Fri, 10 Aug 2018 23:19:05 +0000 (16:19 -0700)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c

index 334c6f90eaf1b76125fbaf1d4af3c573481d6e88..551c1d1984ecaaf8ab2eaf1c4b8dab63ee32759f 100644 (file)
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1161,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
         f2fs_unlock_all(sbi);
  }
  
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
  {
         DEFINE_WAIT(wait);
  
@@ -1397,7 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
  
         /* wait for previous submitted meta pages writeback */
-       wait_on_all_pages_writeback(sbi);
+       f2fs_wait_on_all_pages_writeback(sbi);
  
         /* flush all device cache */
         err = f2fs_flush_device_cache(sbi);
@@ -1406,7 +1406,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  
         /* barrier and flush checkpoint cp pack 2 page if it can */
         commit_checkpoint(sbi, ckpt, start_blk);
-       wait_on_all_pages_writeback(sbi);
+       f2fs_wait_on_all_pages_writeback(sbi);
  
         /*
          * invalidate intermediate page cache borrowed from meta inode
@@ -1418,6 +1418,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  
         f2fs_release_ino_entry(sbi, false);
  
+       f2fs_reset_fsync_node_info(sbi);
+
         clear_sbi_flag(sbi, SBI_IS_DIRTY);
         clear_sbi_flag(sbi, SBI_NEED_CP);
         __set_cp_next_pack(sbi);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c

index b7986b2e5d1d7a419cb24c484bf07eb3d92f35d8..363520ee099a1dbf4e8d01d127702d783a6ac014 100644 (file)
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -177,6 +177,8 @@ static void f2fs_write_end_io(struct bio *bio)
                                         page->index != nid_of_node(page));
  
                 dec_page_count(sbi, type);
+               if (f2fs_in_warm_node_list(sbi, page))
+                       f2fs_del_fsync_node_entry(sbi, page);
                 clear_cold_data(page);
                 end_page_writeback(page);
         }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index 75e81b1af2e848a016209fe319f1e439c86f5508..1647a13be7f93a592455ff5d3ac4c4d049032e1e 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -228,6 +228,12 @@ struct inode_entry {
         struct inode *inode;    /* vfs inode pointer */
  };
  
+struct fsync_node_entry {
+       struct list_head list;  /* list head */
+       struct page *page;      /* warm node page pointer */
+       unsigned int seq_id;    /* sequence id */
+};
+
  /* for the bitmap indicate blocks to be discarded */
  struct discard_entry {
         struct list_head list;  /* list head */
@@ -1156,6 +1162,11 @@ struct f2fs_sb_info {
  
         struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
  
+       spinlock_t fsync_node_lock;             /* for node entry lock */
+       struct list_head fsync_node_list;       /* node list head */
+       unsigned int fsync_seg_id;              /* sequence id */
+       unsigned int fsync_node_num;            /* number of node entries */
+
         /* for orphan inode, use 0'th array */
         unsigned int max_orphans;               /* max orphan inodes */
  
@@ -2827,6 +2838,10 @@ struct node_info;
  
  int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
  bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
  int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
  bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
  bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
@@ -2836,7 +2851,8 @@ pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
  int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
  int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
  int f2fs_truncate_xattr_node(struct inode *inode);
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+                                       unsigned int seq_id);
  int f2fs_remove_inode_page(struct inode *inode);
  struct page *f2fs_new_inode_page(struct inode *inode);
  struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -2845,7 +2861,8 @@ struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
  struct page *f2fs_get_node_page_ra(struct page *parent, int start);
  void f2fs_move_node_page(struct page *node_page, int gc_type);
  int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
-                       struct writeback_control *wbc, bool atomic);
+                       struct writeback_control *wbc, bool atomic,
+                       unsigned int *seq_id);
  int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
                         struct writeback_control *wbc,
                         bool do_balance, enum iostat_type io_type);
@@ -2962,6 +2979,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
  void f2fs_update_dirty_page(struct inode *inode, struct page *page);
  void f2fs_remove_dirty_inode(struct inode *inode);
  int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
  int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
  void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
  int __init f2fs_create_checkpoint_caches(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c

index c2c47f3248c4c67c53b2abe3509ceceb5cc410d6..a90b4f24aa280ec48bfe138645235c06190aa863 100644 (file)
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -213,6 +213,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
                 .nr_to_write = LONG_MAX,
                 .for_reclaim = 0,
         };
+       unsigned int seq_id = 0;
  
         if (unlikely(f2fs_readonly(inode->i_sb)))
                 return 0;
@@ -275,7 +276,7 @@ go_write:
         }
  sync_nodes:
         atomic_inc(&sbi->wb_sync_req[NODE]);
-       ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic);
+       ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
         atomic_dec(&sbi->wb_sync_req[NODE]);
         if (ret)
                 goto out;
@@ -301,7 +302,7 @@ sync_nodes:
          * given fsync mark.
          */
         if (!atomic) {
-               ret = f2fs_wait_on_node_pages_writeback(sbi, ino);
+               ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
                 if (ret)
                         goto out;
         }
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c

index 21ffb784764ce3bc702d26abc358645ac6ee15f4..81fb2f3edb5246d8950d795e1ffe0c899eb5099b 100644 (file)
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -28,6 +28,7 @@
  static struct kmem_cache *nat_entry_slab;
  static struct kmem_cache *free_nid_slab;
  static struct kmem_cache *nat_entry_set_slab;
+static struct kmem_cache *fsync_node_entry_slab;
  
  /*
   * Check whether the given nid is within node id range.
@@ -264,6 +265,72 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
                                                         start, nr);
  }
  
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
+{
+       return NODE_MAPPING(sbi) == page->mapping &&
+                       IS_DNODE(page) && is_cold_node(page);
+}
+
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+       spin_lock_init(&sbi->fsync_node_lock);
+       INIT_LIST_HEAD(&sbi->fsync_node_list);
+       sbi->fsync_seg_id = 0;
+       sbi->fsync_node_num = 0;
+}
+
+static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
+                                                       struct page *page)
+{
+       struct fsync_node_entry *fn;
+       unsigned long flags;
+       unsigned int seq_id;
+
+       fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+
+       get_page(page);
+       fn->page = page;
+       INIT_LIST_HEAD(&fn->list);
+
+       spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+       list_add_tail(&fn->list, &sbi->fsync_node_list);
+       fn->seq_id = sbi->fsync_seg_id++;
+       seq_id = fn->seq_id;
+       sbi->fsync_node_num++;
+       spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+       return seq_id;
+}
+
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
+{
+       struct fsync_node_entry *fn;
+       unsigned long flags;
+
+       spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+       list_for_each_entry(fn, &sbi->fsync_node_list, list) {
+               if (fn->page == page) {
+                       list_del(&fn->list);
+                       sbi->fsync_node_num--;
+                       spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+                       kmem_cache_free(fsync_node_entry_slab, fn);
+                       put_page(page);
+                       return;
+               }
+       }
+       spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+       f2fs_bug_on(sbi, 1);
+}
+
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+       sbi->fsync_seg_id = 0;
+       spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+}
+
  int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
  {
         struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1388,7 +1455,7 @@ continue_unlock:
  
  static int __write_node_page(struct page *page, bool atomic, bool *submitted,
                                 struct writeback_control *wbc, bool do_balance,
-                               enum iostat_type io_type)
+                               enum iostat_type io_type, unsigned int *seq_id)
  {
         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
         nid_t nid;
@@ -1405,6 +1472,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
                 .io_type = io_type,
                 .io_wbc = wbc,
         };
+       unsigned int seq;
  
         trace_f2fs_writepage(page, NODE);
  
@@ -1450,6 +1518,13 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
  
         set_page_writeback(page);
         ClearPageError(page);
+
+       if (f2fs_in_warm_node_list(sbi, page)) {
+               seq = f2fs_add_fsync_node_entry(sbi, page);
+               if (seq_id)
+                       *seq_id = seq;
+       }
+
         fio.old_blkaddr = ni.blk_addr;
         f2fs_do_write_node_page(nid, &fio);
         set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
@@ -1497,7 +1572,7 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
                         goto out_page;
  
                 if (__write_node_page(node_page, false, NULL,
-                                       &wbc, false, FS_GC_NODE_IO))
+                                       &wbc, false, FS_GC_NODE_IO, NULL))
                         unlock_page(node_page);
                 goto release_page;
         } else {
@@ -1514,11 +1589,13 @@ release_page:
  static int f2fs_write_node_page(struct page *page,
                                 struct writeback_control *wbc)
  {
-       return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
+       return __write_node_page(page, false, NULL, wbc, false,
+                                               FS_NODE_IO, NULL);
  }
  
  int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
-                       struct writeback_control *wbc, bool atomic)
+                       struct writeback_control *wbc, bool atomic,
+                       unsigned int *seq_id)
  {
         pgoff_t index;
         pgoff_t last_idx = ULONG_MAX;
@@ -1599,7 +1676,7 @@ continue_unlock:
                         ret = __write_node_page(page, atomic &&
                                                 page == last_page,
                                                 &submitted, wbc, true,
-                                               FS_NODE_IO);
+                                               FS_NODE_IO, seq_id);
                         if (ret) {
                                 unlock_page(page);
                                 f2fs_put_page(last_page, 0);
@@ -1716,7 +1793,7 @@ continue_unlock:
                         set_dentry_mark(page, 0);
  
                         ret = __write_node_page(page, false, &submitted,
-                                               wbc, do_balance, io_type);
+                                               wbc, do_balance, io_type, NULL);
                         if (ret)
                                 unlock_page(page);
                         else if (submitted)
@@ -1749,35 +1826,46 @@ out:
         return ret;
  }
  
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+                                               unsigned int seq_id)
  {
-       pgoff_t index = 0;
-       struct pagevec pvec;
+       struct fsync_node_entry *fn;
+       struct page *page;
+       struct list_head *head = &sbi->fsync_node_list;
+       unsigned long flags;
+       unsigned int cur_seq_id = 0;
         int ret2, ret = 0;
-       int nr_pages;
  
-       pagevec_init(&pvec);
+       while (seq_id && cur_seq_id < seq_id) {
+               spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+               if (list_empty(head)) {
+                       spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+                       break;
+               }
+               fn = list_first_entry(head, struct fsync_node_entry, list);
+               if (fn->seq_id > seq_id) {
+                       spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+                       break;
+               }
+               cur_seq_id = fn->seq_id;
+               page = fn->page;
+               get_page(page);
+               spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
  
-       while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
-                               PAGECACHE_TAG_WRITEBACK))) {
-               int i;
+               f2fs_wait_on_page_writeback(page, NODE, true);
+               if (TestClearPageError(page))
+                       ret = -EIO;
  
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
+               put_page(page);
  
-                       if (ino && ino_of_node(page) == ino) {
-                               f2fs_wait_on_page_writeback(page, NODE, true);
-                               if (TestClearPageError(page))
-                                       ret = -EIO;
-                       }
-               }
-               pagevec_release(&pvec);
-               cond_resched();
+               if (ret)
+                       break;
         }
  
         ret2 = filemap_check_errors(NODE_MAPPING(sbi));
         if (!ret)
                 ret = ret2;
+
         return ret;
  }
  
@@ -2992,8 +3080,15 @@ int __init f2fs_create_node_manager_caches(void)
                         sizeof(struct nat_entry_set));
         if (!nat_entry_set_slab)
                 goto destroy_free_nid;
+
+       fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+                       sizeof(struct fsync_node_entry));
+       if (!fsync_node_entry_slab)
+               goto destroy_nat_entry_set;
         return 0;
  
+destroy_nat_entry_set:
+       kmem_cache_destroy(nat_entry_set_slab);
  destroy_free_nid:
         kmem_cache_destroy(free_nid_slab);
  destroy_nat_entry:
@@ -3004,6 +3099,7 @@ fail:
  
  void f2fs_destroy_node_manager_caches(void)
  {
+       kmem_cache_destroy(fsync_node_entry_slab);
         kmem_cache_destroy(nat_entry_set_slab);
         kmem_cache_destroy(free_nid_slab);
         kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c

index 879ff1b22357ca6302340e84fc802e34c60e0b4c..b0e2b017f390845fa9fca05b6e5ee7fc3f98f5de 100644 (file)
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1036,6 +1036,10 @@ static void f2fs_put_super(struct super_block *sb)
         /* our cp_error case, we can wait for any writeback page */
         f2fs_flush_merged_writes(sbi);
  
+       f2fs_wait_on_all_pages_writeback(sbi);
+
+       f2fs_bug_on(sbi, sbi->fsync_node_num);
+
         iput(sbi->node_inode);
         iput(sbi->meta_inode);
  
@@ -2911,6 +2915,8 @@ try_onemore:
  
         f2fs_init_ino_entry_info(sbi);
  
+       f2fs_init_fsync_node_info(sbi);
+
         /* setup f2fs internal modules */
         err = f2fs_build_segment_manager(sbi);
         if (err) {
author	Chao Yu <yuchao0@huawei.com>
	Thu, 2 Aug 2018 15:03:19 +0000 (23:03 +0800)
committer	Jaegeuk Kim <jaegeuk@kernel.org>
	Fri, 10 Aug 2018 23:19:05 +0000 (16:19 -0700)
fs/f2fs/checkpoint.c		patch \| blob \| history
fs/f2fs/data.c		patch \| blob \| history
fs/f2fs/f2fs.h		patch \| blob \| history
fs/f2fs/file.c		patch \| blob \| history
fs/f2fs/node.c		patch \| blob \| history
fs/f2fs/super.c		patch \| blob \| history