NFS: Count the bytes of skipped subrequests in nfs_lock_and_join_requests()

[sfrench/cifs-2.6.git] / fs / nfs / write.c
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index b1af5dee5e0a87fdbd370bfdd9277d331aa4c721..f68083db63c8a7735b9eeff4606dc9a71bac55e3 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -102,10 +102,8 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
  {
         struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
  
-       if (p) {
-               memset(p, 0, sizeof(*p));
-               p->rw_mode = FMODE_WRITE;
-       }
+       memset(p, 0, sizeof(*p));
+       p->rw_mode = FMODE_WRITE;
         return p;
  }
  
@@ -154,6 +152,14 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
         set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
  }
  
+static struct nfs_page *
+nfs_page_private_request(struct page *page)
+{
+       if (!PagePrivate(page))
+               return NULL;
+       return (struct nfs_page *)page_private(page);
+}
+
  /*
   * nfs_page_find_head_request_locked - find head request associated with @page
   *
@@ -162,21 +168,41 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
   * returns matching head request with reference held, or NULL if not found.
   */
  static struct nfs_page *
-nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_private_request(struct page *page)
  {
-       struct nfs_page *req = NULL;
-
-       if (PagePrivate(page))
-               req = (struct nfs_page *)page_private(page);
-       else if (unlikely(PageSwapCache(page)))
-               req = nfs_page_search_commits_for_head_request_locked(nfsi,
-                       page);
+       struct address_space *mapping = page_file_mapping(page);
+       struct nfs_page *req;
  
+       if (!PagePrivate(page))
+               return NULL;
+       spin_lock(&mapping->private_lock);
+       req = nfs_page_private_request(page);
         if (req) {
                 WARN_ON_ONCE(req->wb_head != req);
                 kref_get(&req->wb_kref);
         }
+       spin_unlock(&mapping->private_lock);
+       return req;
+}
  
+static struct nfs_page *
+nfs_page_find_swap_request(struct page *page)
+{
+       struct inode *inode = page_file_mapping(page)->host;
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_page *req = NULL;
+       if (!PageSwapCache(page))
+               return NULL;
+       mutex_lock(&nfsi->commit_mutex);
+       if (PageSwapCache(page)) {
+               req = nfs_page_search_commits_for_head_request_locked(nfsi,
+                       page);
+               if (req) {
+                       WARN_ON_ONCE(req->wb_head != req);
+                       kref_get(&req->wb_kref);
+               }
+       }
+       mutex_unlock(&nfsi->commit_mutex);
         return req;
  }
  
@@ -187,12 +213,11 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
   */
  static struct nfs_page *nfs_page_find_head_request(struct page *page)
  {
-       struct inode *inode = page_file_mapping(page)->host;
-       struct nfs_page *req = NULL;
+       struct nfs_page *req;
  
-       spin_lock(&inode->i_lock);
-       req = nfs_page_find_head_request_locked(NFS_I(inode), page);
-       spin_unlock(&inode->i_lock);
+       req = nfs_page_find_private_request(page);
+       if (!req)
+               req = nfs_page_find_swap_request(page);
         return req;
  }
  
@@ -241,9 +266,6 @@ nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
  {
         struct nfs_page *req;
  
-       WARN_ON_ONCE(head != head->wb_head);
-       WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
-
         req = head;
         do {
                 if (page_offset >= req->wb_pgbase &&
@@ -269,20 +291,17 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
         unsigned int pos = 0;
         unsigned int len = nfs_page_length(req->wb_page);
  
-       nfs_page_group_lock(req, false);
+       nfs_page_group_lock(req);
  
-       do {
+       for (;;) {
                 tmp = nfs_page_group_search_locked(req->wb_head, pos);
-               if (tmp) {
-                       /* no way this should happen */
-                       WARN_ON_ONCE(tmp->wb_pgbase != pos);
-                       pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
-               }
-       } while (tmp && pos < len);
+               if (!tmp)
+                       break;
+               pos = tmp->wb_pgbase + tmp->wb_bytes;
+       }
  
         nfs_page_group_unlock(req);
-       WARN_ON_ONCE(pos > len);
-       return pos == len;
+       return pos >= len;
  }
  
  /* We can set the PG_uptodate flag if we see that a write request
@@ -333,8 +352,11 @@ static void nfs_end_page_writeback(struct nfs_page *req)
  {
         struct inode *inode = page_file_mapping(req->wb_page)->host;
         struct nfs_server *nfss = NFS_SERVER(inode);
+       bool is_done;
  
-       if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+       is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
+       nfs_unlock_request(req);
+       if (!is_done)
                 return;
  
         end_page_writeback(req->wb_page);
@@ -342,22 +364,6 @@ static void nfs_end_page_writeback(struct nfs_page *req)
                 clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
  }
  
-
-/* nfs_page_group_clear_bits
- *   @req - an nfs request
- * clears all page group related bits from @req
- */
-static void
-nfs_page_group_clear_bits(struct nfs_page *req)
-{
-       clear_bit(PG_TEARDOWN, &req->wb_flags);
-       clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
-       clear_bit(PG_UPTODATE, &req->wb_flags);
-       clear_bit(PG_WB_END, &req->wb_flags);
-       clear_bit(PG_REMOVE, &req->wb_flags);
-}
-
-
  /*
   * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
   *
@@ -366,43 +372,24 @@ nfs_page_group_clear_bits(struct nfs_page *req)
   * @inode - inode associated with request page group, must be holding inode lock
   * @head  - head request of page group, must be holding head lock
   * @req   - request that couldn't lock and needs to wait on the req bit lock
- * @nonblock - if true, don't actually wait
   *
- * NOTE: this must be called holding page_group bit lock and inode spin lock
- *       and BOTH will be released before returning.
+ * NOTE: this must be called holding page_group bit lock
+ *       which will be released before returning.
   *
   * returns 0 on success, < 0 on error.
   */
-static int
-nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
-                         struct nfs_page *req, bool nonblock)
-       __releases(&inode->i_lock)
+static void
+nfs_unroll_locks(struct inode *inode, struct nfs_page *head,
+                         struct nfs_page *req)
  {
         struct nfs_page *tmp;
-       int ret;
  
         /* relinquish all the locks successfully grabbed this run */
-       for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
-               nfs_unlock_request(tmp);
-
-       WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
-
-       /* grab a ref on the request that will be waited on */
-       kref_get(&req->wb_kref);
-
-       nfs_page_group_unlock(head);
-       spin_unlock(&inode->i_lock);
-
-       /* release ref from nfs_page_find_head_request_locked */
-       nfs_release_request(head);
-
-       if (!nonblock)
-               ret = nfs_wait_on_request(req);
-       else
-               ret = -EAGAIN;
-       nfs_release_request(req);
-
-       return ret;
+       for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
+               if (!kref_read(&tmp->wb_kref))
+                       continue;
+               nfs_unlock_and_release_request(tmp);
+       }
  }
  
  /*
@@ -417,7 +404,8 @@ nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
   */
  static void
  nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
-                                struct nfs_page *old_head)
+                                struct nfs_page *old_head,
+                                struct inode *inode)
  {
         while (destroy_list) {
                 struct nfs_page *subreq = destroy_list;
@@ -428,33 +416,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
                 WARN_ON_ONCE(old_head != subreq->wb_head);
  
                 /* make sure old group is not used */
-               subreq->wb_head = subreq;
                 subreq->wb_this_page = subreq;
  
-               /* subreq is now totally disconnected from page group or any
-                * write / commit lists. last chance to wake any waiters */
-               nfs_unlock_request(subreq);
+               clear_bit(PG_REMOVE, &subreq->wb_flags);
  
-               if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
-                       /* release ref on old head request */
-                       nfs_release_request(old_head);
+               /* Note: races with nfs_page_group_destroy() */
+               if (!kref_read(&subreq->wb_kref)) {
+                       /* Check if we raced with nfs_page_group_destroy() */
+                       if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))
+                               nfs_free_request(subreq);
+                       continue;
+               }
  
-                       nfs_page_group_clear_bits(subreq);
+               subreq->wb_head = subreq;
  
-                       /* release the PG_INODE_REF reference */
-                       if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
-                               nfs_release_request(subreq);
-                       else
-                               WARN_ON_ONCE(1);
-               } else {
-                       WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
-                       /* zombie requests have already released the last
-                        * reference and were waiting on the rest of the
-                        * group to complete. Since it's no longer part of a
-                        * group, simply free the request */
-                       nfs_page_group_clear_bits(subreq);
-                       nfs_free_request(subreq);
+               if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
+                       nfs_release_request(subreq);
+                       atomic_long_dec(&NFS_I(inode)->nrequests);
                 }
+
+               /* subreq is now totally disconnected from page group or any
+                * write / commit lists. last chance to wake any waiters */
+               nfs_unlock_and_release_request(subreq);
         }
  }
  
@@ -464,7 +447,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
   *                              operations for this page.
   *
   * @page - the page used to lookup the "page group" of nfs_page structures
- * @nonblock - if true, don't block waiting for request locks
   *
   * This function joins all sub requests to the head request by first
   * locking all requests in the group, cancelling any pending operations
@@ -478,7 +460,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
   * error was encountered.
   */
  static struct nfs_page *
-nfs_lock_and_join_requests(struct page *page, bool nonblock)
+nfs_lock_and_join_requests(struct page *page)
  {
         struct inode *inode = page_file_mapping(page)->host;
         struct nfs_page *head, *subreq;
@@ -487,43 +469,63 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock)
         int ret;
  
  try_again:
-       total_bytes = 0;
-
-       WARN_ON_ONCE(destroy_list);
-
-       spin_lock(&inode->i_lock);
-
         /*
          * A reference is taken only on the head request which acts as a
          * reference to the whole page group - the group will not be destroyed
          * until the head reference is released.
          */
-       head = nfs_page_find_head_request_locked(NFS_I(inode), page);
-
-       if (!head) {
-               spin_unlock(&inode->i_lock);
+       head = nfs_page_find_head_request(page);
+       if (!head)
                 return NULL;
-       }
  
-       /* holding inode lock, so always make a non-blocking call to try the
-        * page group lock */
-       ret = nfs_page_group_lock(head, true);
-       if (ret < 0) {
-               spin_unlock(&inode->i_lock);
+       /* lock the page head first in order to avoid an ABBA inefficiency */
+       if (!nfs_lock_request(head)) {
+               ret = nfs_wait_on_request(head);
+               nfs_release_request(head);
+               if (ret < 0)
+                       return ERR_PTR(ret);
+               goto try_again;
+       }
  
-               if (!nonblock && ret == -EAGAIN) {
-                       nfs_page_group_lock_wait(head);
-                       nfs_release_request(head);
-                       goto try_again;
-               }
+       /* Ensure that nobody removed the request before we locked it */
+       if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
+               nfs_unlock_and_release_request(head);
+               goto try_again;
+       }
  
-               nfs_release_request(head);
+       ret = nfs_page_group_lock(head);
+       if (ret < 0) {
+               nfs_unlock_and_release_request(head);
                 return ERR_PTR(ret);
         }
  
         /* lock each request in the page group */
-       subreq = head;
-       do {
+       total_bytes = head->wb_bytes;
+       for (subreq = head->wb_this_page; subreq != head;
+                       subreq = subreq->wb_this_page) {
+
+               if (!kref_get_unless_zero(&subreq->wb_kref)) {
+                       if (subreq->wb_offset == head->wb_offset + total_bytes)
+                               total_bytes += subreq->wb_bytes;
+                       continue;
+               }
+
+               while (!nfs_lock_request(subreq)) {
+                       /*
+                        * Unlock page to allow nfs_page_group_sync_on_bit()
+                        * to succeed
+                        */
+                       nfs_page_group_unlock(head);
+                       ret = nfs_wait_on_request(subreq);
+                       if (!ret)
+                               ret = nfs_page_group_lock(head);
+                       if (ret < 0) {
+                               nfs_unroll_locks(inode, head, subreq);
+                               nfs_release_request(subreq);
+                               nfs_unlock_and_release_request(head);
+                               return ERR_PTR(ret);
+                       }
+               }
                 /*
                  * Subrequests are always contiguous, non overlapping
                  * and in order - but may be repeated (mirrored writes).
@@ -535,24 +537,12 @@ try_again:
                             ((subreq->wb_offset + subreq->wb_bytes) >
                              (head->wb_offset + total_bytes)))) {
                         nfs_page_group_unlock(head);
-                       spin_unlock(&inode->i_lock);
+                       nfs_unroll_locks(inode, head, subreq);
+                       nfs_unlock_and_release_request(subreq);
+                       nfs_unlock_and_release_request(head);
                         return ERR_PTR(-EIO);
                 }
-
-               if (!nfs_lock_request(subreq)) {
-                       /* releases page group bit lock and
-                        * inode spin lock and all references */
-                       ret = nfs_unroll_locks_and_wait(inode, head,
-                               subreq, nonblock);
-
-                       if (ret == 0)
-                               goto try_again;
-
-                       return ERR_PTR(ret);
-               }
-
-               subreq = subreq->wb_this_page;
-       } while (subreq != head);
+       }
  
         /* Now that all requests are locked, make sure they aren't on any list.
          * Commit list removal accounting is done after locks are dropped */
@@ -573,34 +563,30 @@ try_again:
                 head->wb_bytes = total_bytes;
         }
  
-       /*
-        * prepare head request to be added to new pgio descriptor
-        */
-       nfs_page_group_clear_bits(head);
-
-       /*
-        * some part of the group was still on the inode list - otherwise
-        * the group wouldn't be involved in async write.
-        * grab a reference for the head request, iff it needs one.
-        */
-       if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+       /* Postpone destruction of this request */
+       if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
+               set_bit(PG_INODE_REF, &head->wb_flags);
                 kref_get(&head->wb_kref);
+               atomic_long_inc(&NFS_I(inode)->nrequests);
+       }
  
         nfs_page_group_unlock(head);
  
-       /* drop lock to clean uprequests on destroy list */
-       spin_unlock(&inode->i_lock);
+       nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
  
-       nfs_destroy_unlinked_subrequests(destroy_list, head);
+       /* Did we lose a race with nfs_inode_remove_request()? */
+       if (!(PagePrivate(page) || PageSwapCache(page))) {
+               nfs_unlock_and_release_request(head);
+               return NULL;
+       }
  
-       /* still holds ref on head from nfs_page_find_head_request_locked
+       /* still holds ref on head from nfs_page_find_head_request
          * and still has lock on head from lock loop */
         return head;
  }
  
  static void nfs_write_error_remove_page(struct nfs_page *req)
  {
-       nfs_unlock_request(req);
         nfs_end_page_writeback(req);
         generic_error_remove_page(page_file_mapping(req->wb_page),
                                   req->wb_page);
@@ -624,12 +610,12 @@ nfs_error_is_fatal_on_server(int err)
   * May return an error if the user signalled nfs_wait_on_request().
   */
  static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
-                               struct page *page, bool nonblock)
+                               struct page *page)
  {
         struct nfs_page *req;
         int ret = 0;
  
-       req = nfs_lock_and_join_requests(page, nonblock);
+       req = nfs_lock_and_join_requests(page);
         if (!req)
                 goto out;
         ret = PTR_ERR(req);
@@ -672,7 +658,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
         int ret;
  
         nfs_pageio_cond_complete(pgio, page_index(page));
-       ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
+       ret = nfs_page_async_flush(pgio, page);
         if (ret == -EAGAIN) {
                 redirty_page_for_writepage(wbc, page);
                 ret = 0;
@@ -759,6 +745,7 @@ out_err:
   */
  static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
  {
+       struct address_space *mapping = page_file_mapping(req->wb_page);
         struct nfs_inode *nfsi = NFS_I(inode);
  
         WARN_ON_ONCE(req->wb_this_page != req);
@@ -766,27 +753,30 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
         /* Lock the request! */
         nfs_lock_request(req);
  
-       spin_lock(&inode->i_lock);
-       if (!nfsi->nrequests &&
-           NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
-               inode->i_version++;
         /*
          * Swap-space should not get truncated. Hence no need to plug the race
          * with invalidate/truncate.
          */
+       spin_lock(&mapping->private_lock);
+       if (!nfs_have_writebacks(inode) &&
+           NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) {
+               spin_lock(&inode->i_lock);
+               inode->i_version++;
+               spin_unlock(&inode->i_lock);
+       }
         if (likely(!PageSwapCache(req->wb_page))) {
                 set_bit(PG_MAPPED, &req->wb_flags);
                 SetPagePrivate(req->wb_page);
                 set_page_private(req->wb_page, (unsigned long)req);
         }
-       nfsi->nrequests++;
+       spin_unlock(&mapping->private_lock);
+       atomic_long_inc(&nfsi->nrequests);
         /* this a head request for a page group - mark it as having an
          * extra reference so sub groups can follow suit.
          * This flag also informs pgio layer when to bump nrequests when
          * adding subrequests. */
         WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
         kref_get(&req->wb_kref);
-       spin_unlock(&inode->i_lock);
  }
  
  /*
@@ -794,25 +784,22 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
   */
  static void nfs_inode_remove_request(struct nfs_page *req)
  {
-       struct inode *inode = d_inode(req->wb_context->dentry);
+       struct address_space *mapping = page_file_mapping(req->wb_page);
+       struct inode *inode = mapping->host;
         struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs_page *head;
  
+       atomic_long_dec(&nfsi->nrequests);
         if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
                 head = req->wb_head;
  
-               spin_lock(&inode->i_lock);
+               spin_lock(&mapping->private_lock);
                 if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
                         set_page_private(head->wb_page, 0);
                         ClearPagePrivate(head->wb_page);
                         clear_bit(PG_MAPPED, &head->wb_flags);
                 }
-               nfsi->nrequests--;
-               spin_unlock(&inode->i_lock);
-       } else {
-               spin_lock(&inode->i_lock);
-               nfsi->nrequests--;
-               spin_unlock(&inode->i_lock);
+               spin_unlock(&mapping->private_lock);
         }
  
         if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
@@ -868,7 +855,8 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
   * number of outstanding requests requiring a commit as well as
   * the MM page stats.
   *
- * The caller must hold cinfo->inode->i_lock, and the nfs_page lock.
+ * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
+ * nfs_page lock.
   */
  void
  nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
@@ -876,7 +864,7 @@ nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
  {
         set_bit(PG_CLEAN, &req->wb_flags);
         nfs_list_add_request(req, dst);
-       cinfo->mds->ncommit++;
+       atomic_long_inc(&cinfo->mds->ncommit);
  }
  EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
  
@@ -896,9 +884,9 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
  void
  nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
  {
-       spin_lock(&cinfo->inode->i_lock);
+       mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
         nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
-       spin_unlock(&cinfo->inode->i_lock);
+       mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
         if (req->wb_page)
                 nfs_mark_page_unstable(req->wb_page, cinfo);
  }
@@ -922,7 +910,7 @@ nfs_request_remove_commit_list(struct nfs_page *req,
         if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
                 return;
         nfs_list_remove_request(req);
-       cinfo->mds->ncommit--;
+       atomic_long_dec(&cinfo->mds->ncommit);
  }
  EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
  
@@ -967,7 +955,7 @@ nfs_clear_page_commit(struct page *page)
                     WB_RECLAIMABLE);
  }
  
-/* Called holding inode (/cinfo) lock */
+/* Called holding the request lock on @req */
  static void
  nfs_clear_request_commit(struct nfs_page *req)
  {
@@ -976,9 +964,11 @@ nfs_clear_request_commit(struct nfs_page *req)
                 struct nfs_commit_info cinfo;
  
                 nfs_init_cinfo_from_inode(&cinfo, inode);
+               mutex_lock(&NFS_I(inode)->commit_mutex);
                 if (!pnfs_clear_request_commit(req, &cinfo)) {
                         nfs_request_remove_commit_list(req, &cinfo);
                 }
+               mutex_unlock(&NFS_I(inode)->commit_mutex);
                 nfs_clear_page_commit(req->wb_page);
         }
  }
@@ -1023,7 +1013,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
  remove_req:
                 nfs_inode_remove_request(req);
  next:
-               nfs_unlock_request(req);
                 nfs_end_page_writeback(req);
                 nfs_release_request(req);
         }
@@ -1035,10 +1024,10 @@ out:
  unsigned long
  nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
  {
-       return cinfo->mds->ncommit;
+       return atomic_long_read(&cinfo->mds->ncommit);
  }
  
-/* cinfo->inode->i_lock held by caller */
+/* NFS_I(cinfo->inode)->commit_mutex held by caller */
  int
  nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
                      struct nfs_commit_info *cinfo, int max)
@@ -1046,20 +1035,37 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
         struct nfs_page *req, *tmp;
         int ret = 0;
  
+restart:
         list_for_each_entry_safe(req, tmp, src, wb_list) {
-               if (!nfs_lock_request(req))
-                       continue;
                 kref_get(&req->wb_kref);
-               if (cond_resched_lock(&cinfo->inode->i_lock))
-                       list_safe_reset_next(req, tmp, wb_list);
+               if (!nfs_lock_request(req)) {
+                       int status;
+
+                       /* Prevent deadlock with nfs_lock_and_join_requests */
+                       if (!list_empty(dst)) {
+                               nfs_release_request(req);
+                               continue;
+                       }
+                       /* Ensure we make progress to prevent livelock */
+                       mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
+                       status = nfs_wait_on_request(req);
+                       nfs_release_request(req);
+                       mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+                       if (status < 0)
+                               break;
+                       goto restart;
+               }
                 nfs_request_remove_commit_list(req, cinfo);
+               clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
                 nfs_list_add_request(req, dst);
                 ret++;
                 if ((ret == max) && !cinfo->dreq)
                         break;
+               cond_resched();
         }
         return ret;
  }
+EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
  
  /*
   * nfs_scan_commit - Scan an inode for commit requests
@@ -1076,15 +1082,17 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
  {
         int ret = 0;
  
-       spin_lock(&cinfo->inode->i_lock);
-       if (cinfo->mds->ncommit > 0) {
+       if (!atomic_long_read(&cinfo->mds->ncommit))
+               return 0;
+       mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
+       if (atomic_long_read(&cinfo->mds->ncommit) > 0) {
                 const int max = INT_MAX;
  
                 ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
                                            cinfo, max);
                 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
         }
-       spin_unlock(&cinfo->inode->i_lock);
+       mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
         return ret;
  }
  
@@ -1105,43 +1113,21 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
         unsigned int end;
         int error;
  
-       if (!PagePrivate(page))
-               return NULL;
-
         end = offset + bytes;
-       spin_lock(&inode->i_lock);
-
-       for (;;) {
-               req = nfs_page_find_head_request_locked(NFS_I(inode), page);
-               if (req == NULL)
-                       goto out_unlock;
  
-               /* should be handled by nfs_flush_incompatible */
-               WARN_ON_ONCE(req->wb_head != req);
-               WARN_ON_ONCE(req->wb_this_page != req);
-
-               rqend = req->wb_offset + req->wb_bytes;
-               /*
-                * Tell the caller to flush out the request if
-                * the offsets are non-contiguous.
-                * Note: nfs_flush_incompatible() will already
-                * have flushed out requests having wrong owners.
-                */
-               if (offset > rqend
-                   || end < req->wb_offset)
-                       goto out_flushme;
-
-               if (nfs_lock_request(req))
-                       break;
+       req = nfs_lock_and_join_requests(page);
+       if (IS_ERR_OR_NULL(req))
+               return req;
  
-               /* The request is locked, so wait and then retry */
-               spin_unlock(&inode->i_lock);
-               error = nfs_wait_on_request(req);
-               nfs_release_request(req);
-               if (error != 0)
-                       goto out_err;
-               spin_lock(&inode->i_lock);
-       }
+       rqend = req->wb_offset + req->wb_bytes;
+       /*
+        * Tell the caller to flush out the request if
+        * the offsets are non-contiguous.
+        * Note: nfs_flush_incompatible() will already
+        * have flushed out requests having wrong owners.
+        */
+       if (offset > rqend || end < req->wb_offset)
+               goto out_flushme;
  
         /* Okay, the request matches. Update the region */
         if (offset < req->wb_offset) {
@@ -1152,17 +1138,17 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                 req->wb_bytes = end - req->wb_offset;
         else
                 req->wb_bytes = rqend - req->wb_offset;
-out_unlock:
-       if (req)
-               nfs_clear_request_commit(req);
-       spin_unlock(&inode->i_lock);
         return req;
  out_flushme:
-       spin_unlock(&inode->i_lock);
-       nfs_release_request(req);
+       /*
+        * Note: we mark the request dirty here because
+        * nfs_lock_and_join_requests() cannot preserve
+        * commit flags, so we have to replay the write.
+        */
+       nfs_mark_request_dirty(req);
+       nfs_unlock_and_release_request(req);
         error = nfs_wb_page(inode, page);
-out_err:
-       return ERR_PTR(error);
+       return (error < 0) ? ERR_PTR(error) : NULL;
  }
  
  /*
@@ -1227,8 +1213,6 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
                 l_ctx = req->wb_lock_context;
                 do_flush = req->wb_page != page ||
                         !nfs_match_open_context(req->wb_context, ctx);
-               /* for now, flush if more than 1 request in page_group */
-               do_flush |= req->wb_this_page != req;
                 if (l_ctx && flctx &&
                     !(list_empty_careful(&flctx->flc_posix) &&
                       list_empty_careful(&flctx->flc_flock))) {
@@ -1412,7 +1396,6 @@ static void nfs_redirty_request(struct nfs_page *req)
  {
         nfs_mark_request_dirty(req);
         set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
-       nfs_unlock_request(req);
         nfs_end_page_writeback(req);
         nfs_release_request(req);
  }
@@ -1452,7 +1435,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
                 pg_ops = server->pnfs_curr_ld->pg_write_ops;
  #endif
         nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
-                       server->wsize, ioflags, GFP_NOIO);
+                       server->wsize, ioflags);
  }
  EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
  
@@ -1934,7 +1917,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
         int ret = 0;
  
         /* no commits means nothing needs to be done */
-       if (!nfsi->commit_info.ncommit)
+       if (!atomic_long_read(&nfsi->commit_info.ncommit))
                 return ret;
  
         if (wbc->sync_mode == WB_SYNC_NONE) {
@@ -2015,7 +1998,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
  
         /* blocking call to cancel all requests and join to a single (head)
          * request */
-       req = nfs_lock_and_join_requests(page, false);
+       req = nfs_lock_and_join_requests(page);
  
         if (IS_ERR(req)) {
                 ret = PTR_ERR(req);