Merge branch 'quota_scaling' of git://git.kernel.org/pub/scm/linux/kernel/git/jack...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Sep 2017 22:19:35 +0000 (15:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 7 Sep 2017 22:19:35 +0000 (15:19 -0700)
Pull quota scaling updates from Jan Kara:
 "This contains changes to make the quota subsystem more scalable.

  Reportedly it improves number of files created per second on ext4
  filesystem on fast storage by about a factor of 2x"

* 'quota_scaling' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (28 commits)
  quota: Add lock annotations to struct members
  quota: Reduce contention on dq_data_lock
  fs: Provide __inode_get_bytes()
  quota: Inline dquot_[re]claim_reserved_space() into callsite
  quota: Inline inode_{incr,decr}_space() into callsites
  quota: Inline functions into their callsites
  ext4: Disable dirty list tracking of dquots when journalling quotas
  quota: Allow disabling tracking of dirty dquots in a list
  quota: Remove dq_wait_unused from dquot
  quota: Move locking into clear_dquot_dirty()
  quota: Do not dirty bad dquots
  quota: Fix possible corruption of dqi_flags
  quota: Propagate ->quota_read errors from v2_read_file_info()
  quota: Fix error codes in v2_read_file_info()
  quota: Push dqio_sem down to ->read_file_info()
  quota: Push dqio_sem down to ->write_file_info()
  quota: Push dqio_sem down to ->get_next_id()
  quota: Push dqio_sem down to ->release_dqblk()
  quota: Remove locking for writing to the old quota format
  quota: Do not acquire dqio_sem for dquot overwrites in v2 format
  ...

1  2 
fs/ext4/super.c
fs/quota/dquot.c
fs/quota/quota_v2.c
include/linux/fs.h

diff --combined fs/ext4/super.c
index c9e7be58756b86dc128a9afc849036ea4eb83f17,67ce21224dab44fdbd82cae7f4e37bec43fc6bc3..93aece6891f296b4ffa7f571f1cb84625e900311
@@@ -2404,7 -2404,6 +2404,7 @@@ static void ext4_orphan_cleanup(struct 
        unsigned int s_flags = sb->s_flags;
        int ret, nr_orphans = 0, nr_truncates = 0;
  #ifdef CONFIG_QUOTA
 +      int quota_update = 0;
        int i;
  #endif
        if (!es->s_last_orphan) {
  #ifdef CONFIG_QUOTA
        /* Needed for iput() to work correctly and not trash data */
        sb->s_flags |= MS_ACTIVE;
 -      /* Turn on quotas so that they are updated correctly */
 +
 +      /*
 +       * Turn on quotas which were not enabled for read-only mounts if
 +       * filesystem has quota feature, so that they are updated correctly.
 +       */
 +      if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
 +              int ret = ext4_enable_quotas(sb);
 +
 +              if (!ret)
 +                      quota_update = 1;
 +              else
 +                      ext4_msg(sb, KERN_ERR,
 +                              "Cannot turn on quotas: error %d", ret);
 +      }
 +
 +      /* Turn on journaled quotas used for old sytle */
        for (i = 0; i < EXT4_MAXQUOTAS; i++) {
                if (EXT4_SB(sb)->s_qf_names[i]) {
                        int ret = ext4_quota_on_mount(sb, i);
 -                      if (ret < 0)
 +
 +                      if (!ret)
 +                              quota_update = 1;
 +                      else
                                ext4_msg(sb, KERN_ERR,
                                        "Cannot turn on journaled "
 -                                      "quota: error %d", ret);
 +                                      "quota: type %d: error %d", i, ret);
                }
        }
  #endif
                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
                       PLURAL(nr_truncates));
  #ifdef CONFIG_QUOTA
 -      /* Turn quotas off */
 -      for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 -              if (sb_dqopt(sb)->files[i])
 -                      dquot_quota_off(sb, i);
 +      /* Turn off quotas if they were enabled for orphan cleanup */
 +      if (quota_update) {
 +              for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 +                      if (sb_dqopt(sb)->files[i])
 +                              dquot_quota_off(sb, i);
 +              }
        }
  #endif
        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@@ -5215,7 -5194,7 +5215,7 @@@ static int ext4_statfs_project(struct s
        dquot = dqget(sb, qid);
        if (IS_ERR(dquot))
                return PTR_ERR(dquot);
-       spin_lock(&dq_data_lock);
+       spin_lock(&dquot->dq_dqb_lock);
  
        limit = (dquot->dq_dqb.dqb_bsoftlimit ?
                 dquot->dq_dqb.dqb_bsoftlimit :
                         (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
        }
  
-       spin_unlock(&dq_data_lock);
+       spin_unlock(&dquot->dq_dqb_lock);
        dqput(dquot);
        return 0;
  }
@@@ -5284,18 -5263,13 +5284,13 @@@ static int ext4_statfs(struct dentry *d
        return 0;
  }
  
- /* Helper function for writing quotas on sync - we need to start transaction
-  * before quota file is locked for write. Otherwise the are possible deadlocks:
-  * Process 1                         Process 2
-  * ext4_create()                     quota_sync()
-  *   jbd2_journal_start()                  write_dquot()
-  *   dquot_initialize()                         down(dqio_mutex)
-  *     down(dqio_mutex)                    jbd2_journal_start()
-  *
-  */
  
  #ifdef CONFIG_QUOTA
  
+ /*
+  * Helper functions so that transaction is started before we acquire dqio_sem
+  * to keep correct lock ordering of transaction > dqio_sem
+  */
  static inline struct inode *dquot_to_inode(struct dquot *dquot)
  {
        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
@@@ -5430,6 -5404,13 +5425,13 @@@ static int ext4_quota_on(struct super_b
                        ext4_msg(sb, KERN_WARNING,
                                "Quota file not on filesystem root. "
                                "Journaled quota will not work");
+               sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
+       } else {
+               /*
+                * Clear the flag just in case mount options changed since
+                * last time.
+                */
+               sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
        }
  
        /*
@@@ -5526,16 -5507,13 +5528,16 @@@ static int ext4_enable_quotas(struct su
                test_opt(sb, PRJQUOTA),
        };
  
-       sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+       sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
        for (type = 0; type < EXT4_MAXQUOTAS; type++) {
                if (qf_inums[type]) {
                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
                                DQUOT_USAGE_ENABLED |
                                (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
                        if (err) {
 +                              for (type--; type >= 0; type--)
 +                                      dquot_quota_off(sb, type);
 +
                                ext4_warning(sb,
                                        "Failed to enable quota tracking "
                                        "(type=%d, err=%d). Please run "
diff --combined fs/quota/dquot.c
index 566e6ef99f077c76680cb005417ae995cb4e2878,d51797f850c5106d3d099ccbe2b012596097e4eb..8381db9db6d9bcc3acfa645bf9f73cbb5933c564
  #include <linux/uaccess.h>
  
  /*
-  * There are three quota SMP locks. dq_list_lock protects all lists with quotas
-  * and quota formats.
-  * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
-  * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
-  * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
-  * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects
-  * modifications of quota state (on quotaon and quotaoff) and readers who care
-  * about latest values take it as well.
+  * There are five quota SMP locks:
+  * * dq_list_lock protects all lists with quotas and quota formats.
+  * * dquot->dq_dqb_lock protects data from dq_dqb
+  * * inode->i_lock protects inode->i_blocks, i_bytes and also guards
+  *   consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that
+  *   dquot_transfer() can stabilize amount it transfers
+  * * dq_data_lock protects mem_dqinfo structures and modifications of dquot
+  *   pointers in the inode
+  * * dq_state_lock protects modifications of quota state (on quotaon and
+  *   quotaoff) and readers who care about latest values take it as well.
   *
-  * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
+  * The spinlock ordering is hence:
+  *   dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock,
   *   dq_list_lock > dq_state_lock
   *
   * Note that some things (eg. sb pointer, type, id) doesn't change during
   * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
   * then drops all pointers to dquots from an inode.
   *
-  * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
-  * from inodes (dquot_alloc_space() and such don't check the dq_lock).
-  * Currently dquot is locked only when it is being read to memory (or space for
-  * it is being allocated) on the first dqget() and when it is being released on
-  * the last dqput(). The allocation and release oparations are serialized by
-  * the dq_lock and by checking the use count in dquot_release().  Write
-  * operations on dquots don't hold dq_lock as they copy data under dq_data_lock
-  * spinlock to internal buffers before writing.
+  * Each dquot has its dq_lock mutex.  Dquot is locked when it is being read to
+  * memory (or space for it is being allocated) on the first dqget(), when it is
+  * being written out, and when it is being released on the last dqput(). The
+  * allocation and release operations are serialized by the dq_lock and by
+  * checking the use count in dquot_release().
   *
   * Lock ordering (including related VFS locks) is the following:
-  *   s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
+  *   s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem
   */
  
  static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
@@@ -129,6 -129,8 +129,8 @@@ __cacheline_aligned_in_smp DEFINE_SPINL
  EXPORT_SYMBOL(dq_data_lock);
  DEFINE_STATIC_SRCU(dquot_srcu);
  
+ static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq);
  void __quota_error(struct super_block *sb, const char *func,
                   const char *fmt, ...)
  {
@@@ -247,6 -249,7 +249,7 @@@ struct dqstats dqstats
  EXPORT_SYMBOL(dqstats);
  
  static qsize_t inode_get_rsv_space(struct inode *inode);
+ static qsize_t __inode_get_rsv_space(struct inode *inode);
  static int __dquot_initialize(struct inode *inode, int type);
  
  static inline unsigned int
@@@ -342,6 -345,12 +345,12 @@@ int dquot_mark_dquot_dirty(struct dquo
  {
        int ret = 1;
  
+       if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+               return 0;
+       if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
+               return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags);
        /* If quota is dirty already, we don't have to acquire dq_list_lock */
        if (test_bit(DQ_MOD_B, &dquot->dq_flags))
                return 1;
@@@ -381,18 -390,26 +390,26 @@@ static inline void dqput_all(struct dqu
                dqput(dquot[cnt]);
  }
  
- /* This function needs dq_list_lock */
  static inline int clear_dquot_dirty(struct dquot *dquot)
  {
-       if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags))
+       if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
+               return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags);
+       spin_lock(&dq_list_lock);
+       if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) {
+               spin_unlock(&dq_list_lock);
                return 0;
+       }
        list_del_init(&dquot->dq_dirty);
+       spin_unlock(&dq_list_lock);
        return 1;
  }
  
  void mark_info_dirty(struct super_block *sb, int type)
  {
-       set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags);
+       spin_lock(&dq_data_lock);
+       sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY;
+       spin_unlock(&dq_data_lock);
  }
  EXPORT_SYMBOL(mark_info_dirty);
  
@@@ -406,7 -423,6 +423,6 @@@ int dquot_acquire(struct dquot *dquot
        struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
  
        mutex_lock(&dquot->dq_lock);
-       mutex_lock(&dqopt->dqio_mutex);
        if (!test_bit(DQ_READ_B, &dquot->dq_flags))
                ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
        if (ret < 0)
        smp_mb__before_atomic();
        set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
  out_iolock:
-       mutex_unlock(&dqopt->dqio_mutex);
        mutex_unlock(&dquot->dq_lock);
        return ret;
  }
@@@ -450,21 -465,17 +465,17 @@@ int dquot_commit(struct dquot *dquot
        int ret = 0;
        struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
  
-       mutex_lock(&dqopt->dqio_mutex);
-       spin_lock(&dq_list_lock);
-       if (!clear_dquot_dirty(dquot)) {
-               spin_unlock(&dq_list_lock);
-               goto out_sem;
-       }
-       spin_unlock(&dq_list_lock);
+       mutex_lock(&dquot->dq_lock);
+       if (!clear_dquot_dirty(dquot))
+               goto out_lock;
        /* Inactive dquot can be only if there was error during read/init
         * => we have better not writing it */
        if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
                ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
        else
                ret = -EIO;
- out_sem:
-       mutex_unlock(&dqopt->dqio_mutex);
+ out_lock:
+       mutex_unlock(&dquot->dq_lock);
        return ret;
  }
  EXPORT_SYMBOL(dquot_commit);
@@@ -481,7 -492,6 +492,6 @@@ int dquot_release(struct dquot *dquot
        /* Check whether we are not racing with some other dqget() */
        if (atomic_read(&dquot->dq_count) > 1)
                goto out_dqlock;
-       mutex_lock(&dqopt->dqio_mutex);
        if (dqopt->ops[dquot->dq_id.type]->release_dqblk) {
                ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot);
                /* Write the info */
                        ret = ret2;
        }
        clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
-       mutex_unlock(&dqopt->dqio_mutex);
  out_dqlock:
        mutex_unlock(&dquot->dq_lock);
        return ret;
@@@ -530,22 -539,18 +539,18 @@@ restart
                        continue;
                /* Wait for dquot users */
                if (atomic_read(&dquot->dq_count)) {
-                       DEFINE_WAIT(wait);
                        dqgrab(dquot);
-                       prepare_to_wait(&dquot->dq_wait_unused, &wait,
-                                       TASK_UNINTERRUPTIBLE);
                        spin_unlock(&dq_list_lock);
-                       /* Once dqput() wakes us up, we know it's time to free
+                       /*
+                        * Once dqput() wakes us up, we know it's time to free
                         * the dquot.
                         * IMPORTANT: we rely on the fact that there is always
                         * at most one process waiting for dquot to free.
                         * Otherwise dq_count would be > 1 and we would never
                         * wake up.
                         */
-                       if (atomic_read(&dquot->dq_count) > 1)
-                               schedule();
-                       finish_wait(&dquot->dq_wait_unused, &wait);
+                       wait_event(dquot_ref_wq,
+                                  atomic_read(&dquot->dq_count) == 1);
                        dqput(dquot);
                        /* At this moment dquot() need not exist (it could be
                         * reclaimed by prune_dqcache(). Hence we must
@@@ -629,11 -634,9 +634,9 @@@ int dquot_writeback_dquots(struct super
                while (!list_empty(dirty)) {
                        dquot = list_first_entry(dirty, struct dquot,
                                                 dq_dirty);
-                       /* Dirty and inactive can be only bad dquot... */
-                       if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
-                               clear_dquot_dirty(dquot);
-                               continue;
-                       }
+                       WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
                        /* Now we have active dquot from which someone is
                         * holding reference so we can safely just increase
                         * use count */
@@@ -759,12 -762,12 +762,12 @@@ we_slept
                /* Releasing dquot during quotaoff phase? */
                if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) &&
                    atomic_read(&dquot->dq_count) == 1)
-                       wake_up(&dquot->dq_wait_unused);
+                       wake_up(&dquot_ref_wq);
                spin_unlock(&dq_list_lock);
                return;
        }
        /* Need to release dquot? */
-       if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
+       if (dquot_dirty(dquot)) {
                spin_unlock(&dq_list_lock);
                /* Commit dquot before releasing */
                ret = dquot->dq_sb->dq_op->write_dquot(dquot);
                         * We clear dirty bit anyway, so that we avoid
                         * infinite loop here
                         */
-                       spin_lock(&dq_list_lock);
                        clear_dquot_dirty(dquot);
-                       spin_unlock(&dq_list_lock);
                }
                goto we_slept;
        }
-       /* Clear flag in case dquot was inactive (something bad happened) */
-       clear_dquot_dirty(dquot);
        if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
                spin_unlock(&dq_list_lock);
                dquot->dq_sb->dq_op->release_dquot(dquot);
@@@ -818,10 -817,10 +817,10 @@@ static struct dquot *get_empty_dquot(st
        INIT_LIST_HEAD(&dquot->dq_inuse);
        INIT_HLIST_NODE(&dquot->dq_hash);
        INIT_LIST_HEAD(&dquot->dq_dirty);
-       init_waitqueue_head(&dquot->dq_wait_unused);
        dquot->dq_sb = sb;
        dquot->dq_id = make_kqid_invalid(type);
        atomic_set(&dquot->dq_count, 1);
+       spin_lock_init(&dquot->dq_dqb_lock);
  
        return dquot;
  }
@@@ -1079,42 -1078,6 +1078,6 @@@ static void drop_dquot_ref(struct super
        }
  }
  
- static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
- {
-       dquot->dq_dqb.dqb_curinodes += number;
- }
- static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
- {
-       dquot->dq_dqb.dqb_curspace += number;
- }
- static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
- {
-       dquot->dq_dqb.dqb_rsvspace += number;
- }
- /*
-  * Claim reserved quota space
-  */
- static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
- {
-       if (dquot->dq_dqb.dqb_rsvspace < number) {
-               WARN_ON_ONCE(1);
-               number = dquot->dq_dqb.dqb_rsvspace;
-       }
-       dquot->dq_dqb.dqb_curspace += number;
-       dquot->dq_dqb.dqb_rsvspace -= number;
- }
- static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number)
- {
-       if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
-               number = dquot->dq_dqb.dqb_curspace;
-       dquot->dq_dqb.dqb_rsvspace += number;
-       dquot->dq_dqb.dqb_curspace -= number;
- }
  static inline
  void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
  {
                WARN_ON_ONCE(1);
                dquot->dq_dqb.dqb_rsvspace = 0;
        }
 +      if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <=
 +          dquot->dq_dqb.dqb_bsoftlimit)
 +              dquot->dq_dqb.dqb_btime = (time64_t) 0;
 +      clear_bit(DQ_BLKS_B, &dquot->dq_flags);
  }
  
  static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
@@@ -1149,8 -1108,7 +1112,8 @@@ static void dquot_decr_space(struct dqu
                dquot->dq_dqb.dqb_curspace -= number;
        else
                dquot->dq_dqb.dqb_curspace = 0;
 -      if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
 +      if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <=
 +          dquot->dq_dqb.dqb_bsoftlimit)
                dquot->dq_dqb.dqb_btime = (time64_t) 0;
        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
  }
@@@ -1278,21 -1236,24 +1241,24 @@@ static int ignore_hardlimit(struct dquo
                !(info->dqi_flags & DQF_ROOT_SQUASH));
  }
  
- /* needs dq_data_lock */
- static int check_idq(struct dquot *dquot, qsize_t inodes,
-                    struct dquot_warn *warn)
+ static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes,
+                           struct dquot_warn *warn)
  {
-       qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
+       qsize_t newinodes;
+       int ret = 0;
  
+       spin_lock(&dquot->dq_dqb_lock);
+       newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
        if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) ||
            test_bit(DQ_FAKE_B, &dquot->dq_flags))
-               return 0;
+               goto add;
  
        if (dquot->dq_dqb.dqb_ihardlimit &&
            newinodes > dquot->dq_dqb.dqb_ihardlimit &&
              !ignore_hardlimit(dquot)) {
                prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN);
-               return -EDQUOT;
+               ret = -EDQUOT;
+               goto out;
        }
  
        if (dquot->dq_dqb.dqb_isoftlimit &&
            ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime &&
              !ignore_hardlimit(dquot)) {
                prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN);
-               return -EDQUOT;
+               ret = -EDQUOT;
+               goto out;
        }
  
        if (dquot->dq_dqb.dqb_isoftlimit &&
                dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() +
                    sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace;
        }
+ add:
+       dquot->dq_dqb.dqb_curinodes = newinodes;
  
-       return 0;
+ out:
+       spin_unlock(&dquot->dq_dqb_lock);
+       return ret;
  }
  
- /* needs dq_data_lock */
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
-                    struct dquot_warn *warn)
+ static int dquot_add_space(struct dquot *dquot, qsize_t space,
                         qsize_t rsv_space, unsigned int flags,
+                          struct dquot_warn *warn)
  {
        qsize_t tspace;
        struct super_block *sb = dquot->dq_sb;
+       int ret = 0;
  
+       spin_lock(&dquot->dq_dqb_lock);
        if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
            test_bit(DQ_FAKE_B, &dquot->dq_flags))
-               return 0;
+               goto add;
  
        tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
-               + space;
+               + space + rsv_space;
+       if (flags & DQUOT_SPACE_NOFAIL)
+               goto add;
  
        if (dquot->dq_dqb.dqb_bhardlimit &&
            tspace > dquot->dq_dqb.dqb_bhardlimit &&
              !ignore_hardlimit(dquot)) {
-               if (!prealloc)
+               if (flags & DQUOT_SPACE_WARN)
                        prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
-               return -EDQUOT;
+               ret = -EDQUOT;
+               goto out;
        }
  
        if (dquot->dq_dqb.dqb_bsoftlimit &&
            dquot->dq_dqb.dqb_btime &&
            ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime &&
              !ignore_hardlimit(dquot)) {
-               if (!prealloc)
+               if (flags & DQUOT_SPACE_WARN)
                        prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
-               return -EDQUOT;
+               ret = -EDQUOT;
+               goto out;
        }
  
        if (dquot->dq_dqb.dqb_bsoftlimit &&
            tspace > dquot->dq_dqb.dqb_bsoftlimit &&
            dquot->dq_dqb.dqb_btime == 0) {
-               if (!prealloc) {
+               if (flags & DQUOT_SPACE_WARN) {
                        prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
                        dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() +
                            sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace;
-               }
-               else
+               } else {
                        /*
                         * We don't allow preallocation to exceed softlimit so exceeding will
                         * be always printed
                         */
-                       return -EDQUOT;
+                       ret = -EDQUOT;
+                       goto out;
+               }
        }
-       return 0;
+ add:
+       dquot->dq_dqb.dqb_rsvspace += rsv_space;
+       dquot->dq_dqb.dqb_curspace += space;
+ out:
+       spin_unlock(&dquot->dq_dqb_lock);
+       return ret;
  }
  
  static int info_idq_free(struct dquot *dquot, qsize_t inodes)
  
  static int info_bdq_free(struct dquot *dquot, qsize_t space)
  {
 +      qsize_t tspace;
 +
 +      tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace;
 +
        if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
 -          dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
 +          tspace <= dquot->dq_dqb.dqb_bsoftlimit)
                return QUOTA_NL_NOWARN;
  
 -      if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
 +      if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
                return QUOTA_NL_BSOFTBELOW;
 -      if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
 -          dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
 +      if (tspace >= dquot->dq_dqb.dqb_bhardlimit &&
 +          tspace - space < dquot->dq_dqb.dqb_bhardlimit)
                return QUOTA_NL_BHARDBELOW;
        return QUOTA_NL_NOWARN;
  }
@@@ -1502,8 -1476,15 +1485,15 @@@ static int __dquot_initialize(struct in
                         * did a write before quota was turned on
                         */
                        rsv = inode_get_rsv_space(inode);
-                       if (unlikely(rsv))
-                               dquot_resv_space(dquots[cnt], rsv);
+                       if (unlikely(rsv)) {
+                               spin_lock(&inode->i_lock);
+                               /* Get reservation again under proper lock */
+                               rsv = __inode_get_rsv_space(inode);
+                               spin_lock(&dquots[cnt]->dq_dqb_lock);
+                               dquots[cnt]->dq_dqb.dqb_rsvspace += rsv;
+                               spin_unlock(&dquots[cnt]->dq_dqb_lock);
+                               spin_unlock(&inode->i_lock);
+                       }
                }
        }
  out_lock:
@@@ -1598,39 -1579,12 +1588,12 @@@ static qsize_t *inode_reserved_space(st
        return inode->i_sb->dq_op->get_reserved_space(inode);
  }
  
- void inode_add_rsv_space(struct inode *inode, qsize_t number)
- {
-       spin_lock(&inode->i_lock);
-       *inode_reserved_space(inode) += number;
-       spin_unlock(&inode->i_lock);
- }
- EXPORT_SYMBOL(inode_add_rsv_space);
- void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+ static qsize_t __inode_get_rsv_space(struct inode *inode)
  {
-       spin_lock(&inode->i_lock);
-       *inode_reserved_space(inode) -= number;
-       __inode_add_bytes(inode, number);
-       spin_unlock(&inode->i_lock);
- }
- EXPORT_SYMBOL(inode_claim_rsv_space);
- void inode_reclaim_rsv_space(struct inode *inode, qsize_t number)
- {
-       spin_lock(&inode->i_lock);
-       *inode_reserved_space(inode) += number;
-       __inode_sub_bytes(inode, number);
-       spin_unlock(&inode->i_lock);
- }
- EXPORT_SYMBOL(inode_reclaim_rsv_space);
- void inode_sub_rsv_space(struct inode *inode, qsize_t number)
- {
-       spin_lock(&inode->i_lock);
-       *inode_reserved_space(inode) -= number;
-       spin_unlock(&inode->i_lock);
+       if (!inode->i_sb->dq_op->get_reserved_space)
+               return 0;
+       return *inode_reserved_space(inode);
  }
- EXPORT_SYMBOL(inode_sub_rsv_space);
  
  static qsize_t inode_get_rsv_space(struct inode *inode)
  {
        if (!inode->i_sb->dq_op->get_reserved_space)
                return 0;
        spin_lock(&inode->i_lock);
-       ret = *inode_reserved_space(inode);
+       ret = __inode_get_rsv_space(inode);
        spin_unlock(&inode->i_lock);
        return ret;
  }
  
- static void inode_incr_space(struct inode *inode, qsize_t number,
-                               int reserve)
- {
-       if (reserve)
-               inode_add_rsv_space(inode, number);
-       else
-               inode_add_bytes(inode, number);
- }
- static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
- {
-       if (reserve)
-               inode_sub_rsv_space(inode, number);
-       else
-               inode_sub_bytes(inode, number);
- }
  /*
   * This functions updates i_blocks+i_bytes fields and quota information
   * (together with appropriate checks).
@@@ -1682,7 -1619,13 +1628,13 @@@ int __dquot_alloc_space(struct inode *i
        struct dquot **dquots;
  
        if (!dquot_active(inode)) {
-               inode_incr_space(inode, number, reserve);
+               if (reserve) {
+                       spin_lock(&inode->i_lock);
+                       *inode_reserved_space(inode) += number;
+                       spin_unlock(&inode->i_lock);
+               } else {
+                       inode_add_bytes(inode, number);
+               }
                goto out;
        }
  
  
        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
-       spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!dquots[cnt])
                        continue;
-               ret = check_bdq(dquots[cnt], number,
-                               !(flags & DQUOT_SPACE_WARN), &warn[cnt]);
-               if (ret && !(flags & DQUOT_SPACE_NOFAIL)) {
-                       spin_unlock(&dq_data_lock);
+               if (flags & DQUOT_SPACE_RESERVE) {
+                       ret = dquot_add_space(dquots[cnt], 0, number, flags,
+                                             &warn[cnt]);
+               } else {
+                       ret = dquot_add_space(dquots[cnt], number, 0, flags,
+                                             &warn[cnt]);
+               }
+               if (ret) {
+                       /* Back out changes we already did */
+                       for (cnt--; cnt >= 0; cnt--) {
+                               if (!dquots[cnt])
+                                       continue;
+                               spin_lock(&dquots[cnt]->dq_dqb_lock);
+                               if (flags & DQUOT_SPACE_RESERVE) {
+                                       dquots[cnt]->dq_dqb.dqb_rsvspace -=
+                                                                       number;
+                               } else {
+                                       dquots[cnt]->dq_dqb.dqb_curspace -=
+                                                                       number;
+                               }
+                               spin_unlock(&dquots[cnt]->dq_dqb_lock);
+                       }
+                       spin_unlock(&inode->i_lock);
                        goto out_flush_warn;
                }
        }
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               if (!dquots[cnt])
-                       continue;
-               if (reserve)
-                       dquot_resv_space(dquots[cnt], number);
-               else
-                       dquot_incr_space(dquots[cnt], number);
-       }
-       inode_incr_space(inode, number, reserve);
-       spin_unlock(&dq_data_lock);
+       if (reserve)
+               *inode_reserved_space(inode) += number;
+       else
+               __inode_add_bytes(inode, number);
+       spin_unlock(&inode->i_lock);
  
        if (reserve)
                goto out_flush_warn;
@@@ -1740,23 -1697,26 +1706,26 @@@ int dquot_alloc_inode(struct inode *ino
  
        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
-       spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!dquots[cnt])
                        continue;
-               ret = check_idq(dquots[cnt], 1, &warn[cnt]);
-               if (ret)
+               ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]);
+               if (ret) {
+                       for (cnt--; cnt >= 0; cnt--) {
+                               if (!dquots[cnt])
+                                       continue;
+                               /* Back out changes we already did */
+                               spin_lock(&dquots[cnt]->dq_dqb_lock);
+                               dquots[cnt]->dq_dqb.dqb_curinodes--;
+                               spin_unlock(&dquots[cnt]->dq_dqb_lock);
+                       }
                        goto warn_put_all;
-       }
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               if (!dquots[cnt])
-                       continue;
-               dquot_incr_inodes(dquots[cnt], 1);
+               }
        }
  
  warn_put_all:
-       spin_unlock(&dq_data_lock);
+       spin_unlock(&inode->i_lock);
        if (ret == 0)
                mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
@@@ -1774,21 -1734,33 +1743,33 @@@ int dquot_claim_space_nodirty(struct in
        int cnt, index;
  
        if (!dquot_active(inode)) {
-               inode_claim_rsv_space(inode, number);
+               spin_lock(&inode->i_lock);
+               *inode_reserved_space(inode) -= number;
+               __inode_add_bytes(inode, number);
+               spin_unlock(&inode->i_lock);
                return 0;
        }
  
        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
-       spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        /* Claim reserved quotas to allocated quotas */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               if (dquots[cnt])
-                       dquot_claim_reserved_space(dquots[cnt], number);
+               if (dquots[cnt]) {
+                       struct dquot *dquot = dquots[cnt];
+                       spin_lock(&dquot->dq_dqb_lock);
+                       if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number))
+                               number = dquot->dq_dqb.dqb_rsvspace;
+                       dquot->dq_dqb.dqb_curspace += number;
+                       dquot->dq_dqb.dqb_rsvspace -= number;
+                       spin_unlock(&dquot->dq_dqb_lock);
+               }
        }
        /* Update inode bytes */
-       inode_claim_rsv_space(inode, number);
-       spin_unlock(&dq_data_lock);
+       *inode_reserved_space(inode) -= number;
+       __inode_add_bytes(inode, number);
+       spin_unlock(&inode->i_lock);
        mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        return 0;
@@@ -1804,21 -1776,33 +1785,33 @@@ void dquot_reclaim_space_nodirty(struc
        int cnt, index;
  
        if (!dquot_active(inode)) {
-               inode_reclaim_rsv_space(inode, number);
+               spin_lock(&inode->i_lock);
+               *inode_reserved_space(inode) += number;
+               __inode_sub_bytes(inode, number);
+               spin_unlock(&inode->i_lock);
                return;
        }
  
        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
-       spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        /* Claim reserved quotas to allocated quotas */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               if (dquots[cnt])
-                       dquot_reclaim_reserved_space(dquots[cnt], number);
+               if (dquots[cnt]) {
+                       struct dquot *dquot = dquots[cnt];
+                       spin_lock(&dquot->dq_dqb_lock);
+                       if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
+                               number = dquot->dq_dqb.dqb_curspace;
+                       dquot->dq_dqb.dqb_rsvspace += number;
+                       dquot->dq_dqb.dqb_curspace -= number;
+                       spin_unlock(&dquot->dq_dqb_lock);
+               }
        }
        /* Update inode bytes */
-       inode_reclaim_rsv_space(inode, number);
-       spin_unlock(&dq_data_lock);
+       *inode_reserved_space(inode) += number;
+       __inode_sub_bytes(inode, number);
+       spin_unlock(&inode->i_lock);
        mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        return;
@@@ -1836,19 -1820,26 +1829,26 @@@ void __dquot_free_space(struct inode *i
        int reserve = flags & DQUOT_SPACE_RESERVE, index;
  
        if (!dquot_active(inode)) {
-               inode_decr_space(inode, number, reserve);
+               if (reserve) {
+                       spin_lock(&inode->i_lock);
+                       *inode_reserved_space(inode) -= number;
+                       spin_unlock(&inode->i_lock);
+               } else {
+                       inode_sub_bytes(inode, number);
+               }
                return;
        }
  
        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
-       spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                int wtype;
  
                warn[cnt].w_type = QUOTA_NL_NOWARN;
                if (!dquots[cnt])
                        continue;
+               spin_lock(&dquots[cnt]->dq_dqb_lock);
                wtype = info_bdq_free(dquots[cnt], number);
                if (wtype != QUOTA_NL_NOWARN)
                        prepare_warning(&warn[cnt], dquots[cnt], wtype);
                        dquot_free_reserved_space(dquots[cnt], number);
                else
                        dquot_decr_space(dquots[cnt], number);
+               spin_unlock(&dquots[cnt]->dq_dqb_lock);
        }
-       inode_decr_space(inode, number, reserve);
-       spin_unlock(&dq_data_lock);
+       if (reserve)
+               *inode_reserved_space(inode) -= number;
+       else
+               __inode_sub_bytes(inode, number);
+       spin_unlock(&inode->i_lock);
  
        if (reserve)
                goto out_unlock;
@@@ -1884,19 -1879,21 +1888,21 @@@ void dquot_free_inode(struct inode *ino
  
        dquots = i_dquot(inode);
        index = srcu_read_lock(&dquot_srcu);
-       spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                int wtype;
  
                warn[cnt].w_type = QUOTA_NL_NOWARN;
                if (!dquots[cnt])
                        continue;
+               spin_lock(&dquots[cnt]->dq_dqb_lock);
                wtype = info_idq_free(dquots[cnt], 1);
                if (wtype != QUOTA_NL_NOWARN)
                        prepare_warning(&warn[cnt], dquots[cnt], wtype);
                dquot_decr_inodes(dquots[cnt], 1);
+               spin_unlock(&dquots[cnt]->dq_dqb_lock);
        }
-       spin_unlock(&dq_data_lock);
+       spin_unlock(&inode->i_lock);
        mark_all_dquot_dirty(dquots);
        srcu_read_unlock(&dquot_srcu, index);
        flush_warnings(warn);
@@@ -1917,7 -1914,7 +1923,7 @@@ EXPORT_SYMBOL(dquot_free_inode)
   */
  int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
  {
-       qsize_t space, cur_space;
+       qsize_t cur_space;
        qsize_t rsv_space = 0;
        qsize_t inode_usage = 1;
        struct dquot *transfer_from[MAXQUOTAS] = {};
        }
  
        spin_lock(&dq_data_lock);
+       spin_lock(&inode->i_lock);
        if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
+               spin_unlock(&inode->i_lock);
                spin_unlock(&dq_data_lock);
                return 0;
        }
-       cur_space = inode_get_bytes(inode);
-       rsv_space = inode_get_rsv_space(inode);
-       space = cur_space + rsv_space;
-       /* Build the transfer_from list and check the limits */
+       cur_space = __inode_get_bytes(inode);
+       rsv_space = __inode_get_rsv_space(inode);
+       /*
+        * Build the transfer_from list, check limits, and update usage in
+        * the target structures.
+        */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                /*
                 * Skip changes for same uid or gid or for turned off quota-type.
                        continue;
                is_valid[cnt] = 1;
                transfer_from[cnt] = i_dquot(inode)[cnt];
-               ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]);
+               ret = dquot_add_inodes(transfer_to[cnt], inode_usage,
+                                      &warn_to[cnt]);
                if (ret)
                        goto over_quota;
-               ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
-               if (ret)
+               ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
+                                     &warn_to[cnt]);
+               if (ret) {
+                       dquot_decr_inodes(transfer_to[cnt], inode_usage);
                        goto over_quota;
+               }
        }
  
-       /*
-        * Finally perform the needed transfer from transfer_from to transfer_to
-        */
+       /* Decrease usage for source structures and update quota pointers */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!is_valid[cnt])
                        continue;
                /* Due to IO error we might not have transfer_from[] structure */
                if (transfer_from[cnt]) {
                        int wtype;
+                       spin_lock(&transfer_from[cnt]->dq_dqb_lock);
                        wtype = info_idq_free(transfer_from[cnt], inode_usage);
                        if (wtype != QUOTA_NL_NOWARN)
                                prepare_warning(&warn_from_inodes[cnt],
                                                transfer_from[cnt], wtype);
-                       wtype = info_bdq_free(transfer_from[cnt], space);
+                       wtype = info_bdq_free(transfer_from[cnt],
+                                             cur_space + rsv_space);
                        if (wtype != QUOTA_NL_NOWARN)
                                prepare_warning(&warn_from_space[cnt],
                                                transfer_from[cnt], wtype);
                        dquot_decr_space(transfer_from[cnt], cur_space);
                        dquot_free_reserved_space(transfer_from[cnt],
                                                  rsv_space);
+                       spin_unlock(&transfer_from[cnt]->dq_dqb_lock);
                }
-               dquot_incr_inodes(transfer_to[cnt], inode_usage);
-               dquot_incr_space(transfer_to[cnt], cur_space);
-               dquot_resv_space(transfer_to[cnt], rsv_space);
                i_dquot(inode)[cnt] = transfer_to[cnt];
        }
+       spin_unlock(&inode->i_lock);
        spin_unlock(&dq_data_lock);
  
        mark_all_dquot_dirty(transfer_from);
                        transfer_to[cnt] = transfer_from[cnt];
        return 0;
  over_quota:
+       /* Back out changes we already did */
+       for (cnt--; cnt >= 0; cnt--) {
+               if (!is_valid[cnt])
+                       continue;
+               spin_lock(&transfer_to[cnt]->dq_dqb_lock);
+               dquot_decr_inodes(transfer_to[cnt], inode_usage);
+               dquot_decr_space(transfer_to[cnt], cur_space);
+               dquot_free_reserved_space(transfer_to[cnt], rsv_space);
+               spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
+       }
+       spin_unlock(&inode->i_lock);
        spin_unlock(&dq_data_lock);
        flush_warnings(warn_to);
        return ret;
@@@ -2066,29 -2080,21 +2089,21 @@@ EXPORT_SYMBOL(dquot_transfer)
   */
  int dquot_commit_info(struct super_block *sb, int type)
  {
-       int ret;
        struct quota_info *dqopt = sb_dqopt(sb);
  
-       mutex_lock(&dqopt->dqio_mutex);
-       ret = dqopt->ops[type]->write_file_info(sb, type);
-       mutex_unlock(&dqopt->dqio_mutex);
-       return ret;
+       return dqopt->ops[type]->write_file_info(sb, type);
  }
  EXPORT_SYMBOL(dquot_commit_info);
  
  int dquot_get_next_id(struct super_block *sb, struct kqid *qid)
  {
        struct quota_info *dqopt = sb_dqopt(sb);
-       int err;
  
        if (!sb_has_quota_active(sb, qid->type))
                return -ESRCH;
        if (!dqopt->ops[qid->type]->get_next_id)
                return -ENOSYS;
-       mutex_lock(&dqopt->dqio_mutex);
-       err = dqopt->ops[qid->type]->get_next_id(sb, qid);
-       mutex_unlock(&dqopt->dqio_mutex);
-       return err;
+       return dqopt->ops[qid->type]->get_next_id(sb, qid);
  }
  EXPORT_SYMBOL(dquot_get_next_id);
  
@@@ -2337,15 -2343,14 +2352,14 @@@ static int vfs_load_quota_inode(struct 
        dqopt->info[type].dqi_format = fmt;
        dqopt->info[type].dqi_fmt_id = format_id;
        INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
-       mutex_lock(&dqopt->dqio_mutex);
        error = dqopt->ops[type]->read_file_info(sb, type);
-       if (error < 0) {
-               mutex_unlock(&dqopt->dqio_mutex);
+       if (error < 0)
                goto out_file_init;
-       }
-       if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+       if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) {
+               spin_lock(&dq_data_lock);
                dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
-       mutex_unlock(&dqopt->dqio_mutex);
+               spin_unlock(&dq_data_lock);
+       }
        spin_lock(&dq_state_lock);
        dqopt->flags |= dquot_state_flag(flags, type);
        spin_unlock(&dq_state_lock);
@@@ -2572,7 -2577,7 +2586,7 @@@ static void do_get_dqblk(struct dquot *
        struct mem_dqblk *dm = &dquot->dq_dqb;
  
        memset(di, 0, sizeof(*di));
-       spin_lock(&dq_data_lock);
+       spin_lock(&dquot->dq_dqb_lock);
        di->d_spc_hardlimit = dm->dqb_bhardlimit;
        di->d_spc_softlimit = dm->dqb_bsoftlimit;
        di->d_ino_hardlimit = dm->dqb_ihardlimit;
        di->d_ino_count = dm->dqb_curinodes;
        di->d_spc_timer = dm->dqb_btime;
        di->d_ino_timer = dm->dqb_itime;
-       spin_unlock(&dq_data_lock);
+       spin_unlock(&dquot->dq_dqb_lock);
  }
  
  int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
@@@ -2645,7 -2650,7 +2659,7 @@@ static int do_set_dqblk(struct dquot *d
             (di->d_ino_hardlimit > dqi->dqi_max_ino_limit)))
                return -ERANGE;
  
-       spin_lock(&dq_data_lock);
+       spin_lock(&dquot->dq_dqb_lock);
        if (di->d_fieldmask & QC_SPACE) {
                dm->dqb_curspace = di->d_space - dm->dqb_rsvspace;
                check_blim = 1;
  
        if (check_blim) {
                if (!dm->dqb_bsoftlimit ||
 -                  dm->dqb_curspace < dm->dqb_bsoftlimit) {
 +                  dm->dqb_curspace + dm->dqb_rsvspace < dm->dqb_bsoftlimit) {
                        dm->dqb_btime = 0;
                        clear_bit(DQ_BLKS_B, &dquot->dq_flags);
                } else if (!(di->d_fieldmask & QC_SPC_TIMER))
                clear_bit(DQ_FAKE_B, &dquot->dq_flags);
        else
                set_bit(DQ_FAKE_B, &dquot->dq_flags);
-       spin_unlock(&dq_data_lock);
+       spin_unlock(&dquot->dq_dqb_lock);
        mark_dquot_dirty(dquot);
  
        return 0;
diff --combined fs/quota/quota_v2.c
index 2259329616b7ed04712cf4da698c3fe1ec137e83,cdbf71664cdce08f66010d06f0204ee0a40ad50d..c0187cda2c1ed3ff65b449a74aa71c05ca29eb58
@@@ -65,9 -65,11 +65,11 @@@ static int v2_read_header(struct super_
        if (size != sizeof(struct v2_disk_dqheader)) {
                quota_error(sb, "Failed header read: expected=%zd got=%zd",
                            sizeof(struct v2_disk_dqheader), size);
-               return 0;
+               if (size < 0)
+                       return size;
+               return -EIO;
        }
-       return 1;
+       return 0;
  }
  
  /* Check whether given file is really vfsv0 quotafile */
@@@ -77,7 -79,7 +79,7 @@@ static int v2_check_quota_file(struct s
        static const uint quota_magics[] = V2_INITQMAGICS;
        static const uint quota_versions[] = V2_INITQVERSIONS;
   
-       if (!v2_read_header(sb, type, &dqhead))
+       if (v2_read_header(sb, type, &dqhead))
                return 0;
        if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
            le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
@@@ -90,28 -92,41 +92,39 @@@ static int v2_read_file_info(struct sup
  {
        struct v2_disk_dqinfo dinfo;
        struct v2_disk_dqheader dqhead;
-       struct mem_dqinfo *info = sb_dqinfo(sb, type);
+       struct quota_info *dqopt = sb_dqopt(sb);
+       struct mem_dqinfo *info = &dqopt->info[type];
        struct qtree_mem_dqinfo *qinfo;
        ssize_t size;
        unsigned int version;
+       int ret;
  
-       if (!v2_read_header(sb, type, &dqhead))
-               return -1;
+       down_read(&dqopt->dqio_sem);
+       ret = v2_read_header(sb, type, &dqhead);
+       if (ret < 0)
+               goto out;
        version = le32_to_cpu(dqhead.dqh_version);
        if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
-           (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
-               return -1;
+           (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1)) {
+               ret = -EINVAL;
+               goto out;
+       }
  
        size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
               sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
        if (size != sizeof(struct v2_disk_dqinfo)) {
                quota_error(sb, "Can't read info structure");
-               return -1;
+               if (size < 0)
+                       ret = size;
+               else
+                       ret = -EIO;
+               goto out;
        }
        info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
-       if (!info->dqi_priv)
-               return -ENOMEM;
+       if (!info->dqi_priv) {
 -              printk(KERN_WARNING
 -                     "Not enough memory for quota information structure.\n");
+               ret = -ENOMEM;
+               goto out;
+       }
        qinfo = info->dqi_priv;
        if (version == 0) {
                /* limits are stored as unsigned 32-bit data */
                qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
                qinfo->dqi_ops = &v2r1_qtree_ops;
        }
-       return 0;
+       ret = 0;
+ out:
+       up_read(&dqopt->dqio_sem);
+       return ret;
  }
  
  /* Write information header to quota file */
  static int v2_write_file_info(struct super_block *sb, int type)
  {
        struct v2_disk_dqinfo dinfo;
-       struct mem_dqinfo *info = sb_dqinfo(sb, type);
+       struct quota_info *dqopt = sb_dqopt(sb);
+       struct mem_dqinfo *info = &dqopt->info[type];
        struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
        ssize_t size;
  
+       down_write(&dqopt->dqio_sem);
        spin_lock(&dq_data_lock);
        info->dqi_flags &= ~DQF_INFO_DIRTY;
        dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
        dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
        size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
               sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+       up_write(&dqopt->dqio_sem);
        if (size != sizeof(struct v2_disk_dqinfo)) {
                quota_error(sb, "Can't write info structure");
                return -1;
@@@ -283,17 -304,51 +302,51 @@@ static int v2r1_is_id(void *dp, struct 
  
  static int v2_read_dquot(struct dquot *dquot)
  {
-       return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+       int ret;
+       down_read(&dqopt->dqio_sem);
+       ret = qtree_read_dquot(
+                       sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
+                       dquot);
+       up_read(&dqopt->dqio_sem);
+       return ret;
  }
  
  static int v2_write_dquot(struct dquot *dquot)
  {
-       return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+       int ret;
+       bool alloc = false;
+       /*
+        * If space for dquot is already allocated, we don't need any
+        * protection as we'll only overwrite the place of dquot. We are
+        * still protected by concurrent writes of the same dquot by
+        * dquot->dq_lock.
+        */
+       if (!dquot->dq_off) {
+               alloc = true;
+               down_write(&dqopt->dqio_sem);
+       }
+       ret = qtree_write_dquot(
+                       sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
+                       dquot);
+       if (alloc)
+               up_write(&dqopt->dqio_sem);
+       return ret;
  }
  
  static int v2_release_dquot(struct dquot *dquot)
  {
-       return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+       int ret;
+       down_write(&dqopt->dqio_sem);
+       ret = qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+       up_write(&dqopt->dqio_sem);
+       return ret;
  }
  
  static int v2_free_file_info(struct super_block *sb, int type)
  
  static int v2_get_next_id(struct super_block *sb, struct kqid *qid)
  {
-       return qtree_get_next_id(sb_dqinfo(sb, qid->type)->dqi_priv, qid);
+       struct quota_info *dqopt = sb_dqopt(sb);
+       int ret;
+       down_read(&dqopt->dqio_sem);
+       ret = qtree_get_next_id(sb_dqinfo(sb, qid->type)->dqi_priv, qid);
+       up_read(&dqopt->dqio_sem);
+       return ret;
  }
  
  static const struct quota_format_ops v2_format_ops = {
diff --combined include/linux/fs.h
index 7d6079dceb39158b28a63714fd32e391ea327746,d6e9ab7f184f6b20b0ac57a902a8eefe26d57689..509434aaf5a46f11479db99c7654992f58dbede4
@@@ -72,8 -72,6 +72,8 @@@ extern int leases_enable, lease_break_t
  extern int sysctl_protected_symlinks;
  extern int sysctl_protected_hardlinks;
  
 +typedef __kernel_rwf_t rwf_t;
 +
  struct buffer_head;
  typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create);
@@@ -429,7 -427,6 +429,7 @@@ struct block_device 
  #endif
        struct block_device *   bd_contains;
        unsigned                bd_block_size;
 +      u8                      bd_partno;
        struct hd_struct *      bd_part;
        /* number of times partitions within this device have been opened. */
        unsigned                bd_part_count;
@@@ -910,9 -907,9 +910,9 @@@ static inline struct file *get_file(str
  /* Page cache limit. The filesystems should put that into their s_maxbytes 
     limits, otherwise bad things can happen in VM. */ 
  #if BITS_PER_LONG==32
 -#define MAX_LFS_FILESIZE      (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 +#define MAX_LFS_FILESIZE      ((loff_t)ULONG_MAX << PAGE_SHIFT)
  #elif BITS_PER_LONG==64
 -#define MAX_LFS_FILESIZE      ((loff_t)0x7fffffffffffffffLL)
 +#define MAX_LFS_FILESIZE      ((loff_t)LLONG_MAX)
  #endif
  
  #define FL_POSIX      1
@@@ -1003,6 -1000,7 +1003,6 @@@ struct file_lock 
        unsigned char fl_type;
        unsigned int fl_pid;
        int fl_link_cpu;                /* what cpu's list is this on? */
 -      struct pid *fl_nspid;
        wait_queue_head_t fl_wait;
        struct file *fl_file;
        loff_t fl_start;
@@@ -1270,6 -1268,8 +1270,6 @@@ extern void f_delown(struct file *filp)
  extern pid_t f_getown(struct file *filp);
  extern int send_sigurg(struct fown_struct *fown);
  
 -struct mm_struct;
 -
  /*
   *    Umount options
   */
@@@ -1758,9 -1758,9 +1758,9 @@@ extern ssize_t __vfs_write(struct file 
  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
  extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 -              unsigned long, loff_t *, int);
 +              unsigned long, loff_t *, rwf_t);
  extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
 -              unsigned long, loff_t *, int);
 +              unsigned long, loff_t *, rwf_t);
  extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
                                   loff_t, size_t, unsigned int);
  extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
@@@ -2471,13 -2471,9 +2471,13 @@@ static inline void bd_unlink_disk_holde
  #endif
  
  /* fs/char_dev.c */
 -#define CHRDEV_MAJOR_HASH_SIZE        255
 +#define CHRDEV_MAJOR_MAX 512
  /* Marks the bottom of the first segment of free char majors */
  #define CHRDEV_MAJOR_DYN_END 234
 +/* Marks the top and bottom of the second segment of free char majors */
 +#define CHRDEV_MAJOR_DYN_EXT_START 511
 +#define CHRDEV_MAJOR_DYN_EXT_END 384
 +
  extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
  extern int register_chrdev_region(dev_t, unsigned, const char *);
  extern int __register_chrdev(unsigned int major, unsigned int baseminor,
@@@ -2504,14 -2500,14 +2504,14 @@@ static inline void unregister_chrdev(un
  #define BDEVT_SIZE    10      /* Largest string for MAJ:MIN for blkdev */
  
  #ifdef CONFIG_BLOCK
 -#define BLKDEV_MAJOR_HASH_SIZE        255
 +#define BLKDEV_MAJOR_MAX      512
  extern const char *__bdevname(dev_t, char *buffer);
  extern const char *bdevname(struct block_device *bdev, char *buffer);
  extern struct block_device *lookup_bdev(const char *);
  extern void blkdev_show(struct seq_file *,off_t);
  
  #else
 -#define BLKDEV_MAJOR_HASH_SIZE        0
 +#define BLKDEV_MAJOR_MAX      0
  #endif
  
  extern void init_special_inode(struct inode *, umode_t, dev_t);
@@@ -2543,19 -2539,12 +2543,19 @@@ extern int invalidate_inode_pages2_rang
  extern int write_inode_now(struct inode *, int);
  extern int filemap_fdatawrite(struct address_space *);
  extern int filemap_flush(struct address_space *);
 -extern int filemap_fdatawait(struct address_space *);
  extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
  extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
                                   loff_t lend);
 +
 +static inline int filemap_fdatawait(struct address_space *mapping)
 +{
 +      return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
 +}
 +
  extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
                                  loff_t lend);
 +extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
 +                                              loff_t lend);
  extern int filemap_write_and_wait(struct address_space *mapping);
  extern int filemap_write_and_wait_range(struct address_space *mapping,
                                        loff_t lstart, loff_t lend);
@@@ -2564,19 -2553,12 +2564,19 @@@ extern int __filemap_fdatawrite_range(s
  extern int filemap_fdatawrite_range(struct address_space *mapping,
                                loff_t start, loff_t end);
  extern int filemap_check_errors(struct address_space *mapping);
 -
  extern void __filemap_set_wb_err(struct address_space *mapping, int err);
 +
 +extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
 +                                              loff_t lend);
  extern int __must_check file_check_and_advance_wb_err(struct file *file);
  extern int __must_check file_write_and_wait_range(struct file *file,
                                                loff_t start, loff_t end);
  
 +static inline int file_write_and_wait(struct file *file)
 +{
 +      return file_write_and_wait_range(file, 0, LLONG_MAX);
 +}
 +
  /**
   * filemap_set_wb_err - set a writeback error on an address_space
   * @mapping: mapping in which to set writeback error
   * When a writeback error occurs, most filesystems will want to call
   * filemap_set_wb_err to record the error in the mapping so that it will be
   * automatically reported whenever fsync is called on the file.
 - *
 - * FIXME: mention FS_* flag here?
   */
  static inline void filemap_set_wb_err(struct address_space *mapping, int err)
  {
@@@ -2847,7 -2831,6 +2847,7 @@@ static inline void lockdep_annotate_ino
  #endif
  extern void unlock_new_inode(struct inode *);
  extern unsigned int get_next_ino(void);
 +extern void evict_inodes(struct super_block *sb);
  
  extern void __iget(struct inode * inode);
  extern void iget_failed(struct inode *);
@@@ -2891,9 -2874,9 +2891,9 @@@ extern ssize_t generic_file_direct_writ
  extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
  
  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 -              int flags);
 +              rwf_t flags);
  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 -              int flags);
 +              rwf_t flags);
  
  /* fs/block_dev.c */
  extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
@@@ -3015,6 -2998,10 +3015,10 @@@ void __inode_add_bytes(struct inode *in
  void inode_add_bytes(struct inode *inode, loff_t bytes);
  void __inode_sub_bytes(struct inode *inode, loff_t bytes);
  void inode_sub_bytes(struct inode *inode, loff_t bytes);
+ static inline loff_t __inode_get_bytes(struct inode *inode)
+ {
+       return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
+ }
  loff_t inode_get_bytes(struct inode *inode);
  void inode_set_bytes(struct inode *inode, loff_t bytes);
  const char *simple_get_link(struct dentry *, struct inode *,
@@@ -3160,7 -3147,7 +3164,7 @@@ static inline int iocb_flags(struct fil
        return res;
  }
  
 -static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
 +static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
  {
        if (unlikely(flags & ~RWF_SUPPORTED))
                return -EOPNOTSUPP;