fuse: fix possibly missed wake-up after abort
[sfrench/cifs-2.6.git] / fs / fuse / dev.c
index 11ea2c4a38abc16a50b78a0a9668706c7d967a62..a5e516a40e7a359cdae8b2bf289175e971b6e6c9 100644 (file)
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
 
+/* Ordinary requests have even IDs, while interrupts IDs are odd */
+#define FUSE_INT_REQ_BIT (1ULL << 0)
+#define FUSE_REQ_ID_STEP (1ULL << 1)
+
 static struct kmem_cache *fuse_req_cachep;
 
 static struct fuse_dev *fuse_get_dev(struct file *file)
@@ -40,9 +44,6 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
                              struct fuse_page_desc *page_descs,
                              unsigned npages)
 {
-       memset(req, 0, sizeof(*req));
-       memset(pages, 0, sizeof(*pages) * npages);
-       memset(page_descs, 0, sizeof(*page_descs) * npages);
        INIT_LIST_HEAD(&req->list);
        INIT_LIST_HEAD(&req->intr_entry);
        init_waitqueue_head(&req->waitq);
@@ -53,30 +54,36 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
        __set_bit(FR_PENDING, &req->flags);
 }
 
+static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
+                                         struct fuse_page_desc **desc)
+{
+       struct page **pages;
+
+       pages = kzalloc(npages * (sizeof(struct page *) +
+                                 sizeof(struct fuse_page_desc)), flags);
+       *desc = (void *) pages + npages * sizeof(struct page *);
+
+       return pages;
+}
+
 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
 {
-       struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
+       struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
        if (req) {
-               struct page **pages;
-               struct fuse_page_desc *page_descs;
-
-               if (npages <= FUSE_REQ_INLINE_PAGES) {
+               struct page **pages = NULL;
+               struct fuse_page_desc *page_descs = NULL;
+
+               WARN_ON(npages > FUSE_MAX_MAX_PAGES);
+               if (npages > FUSE_REQ_INLINE_PAGES) {
+                       pages = fuse_req_pages_alloc(npages, flags,
+                                                    &page_descs);
+                       if (!pages) {
+                               kmem_cache_free(fuse_req_cachep, req);
+                               return NULL;
+                       }
+               } else if (npages) {
                        pages = req->inline_pages;
                        page_descs = req->inline_page_descs;
-               } else {
-                       pages = kmalloc_array(npages, sizeof(struct page *),
-                                             flags);
-                       page_descs =
-                               kmalloc_array(npages,
-                                             sizeof(struct fuse_page_desc),
-                                             flags);
-               }
-
-               if (!pages || !page_descs) {
-                       kfree(pages);
-                       kfree(page_descs);
-                       kmem_cache_free(fuse_req_cachep, req);
-                       return NULL;
                }
 
                fuse_request_init(req, pages, page_descs, npages);
@@ -95,12 +102,41 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
        return __fuse_request_alloc(npages, GFP_NOFS);
 }
 
-void fuse_request_free(struct fuse_req *req)
+static void fuse_req_pages_free(struct fuse_req *req)
 {
-       if (req->pages != req->inline_pages) {
+       if (req->pages != req->inline_pages)
                kfree(req->pages);
-               kfree(req->page_descs);
-       }
+}
+
+bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
+                           gfp_t flags)
+{
+       struct page **pages;
+       struct fuse_page_desc *page_descs;
+       unsigned int npages = min_t(unsigned int,
+                                   max_t(unsigned int, req->max_pages * 2,
+                                         FUSE_DEFAULT_MAX_PAGES_PER_REQ),
+                                   fc->max_pages);
+       WARN_ON(npages <= req->max_pages);
+
+       pages = fuse_req_pages_alloc(npages, flags, &page_descs);
+       if (!pages)
+               return false;
+
+       memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
+       memcpy(page_descs, req->page_descs,
+              sizeof(struct fuse_page_desc) * req->max_pages);
+       fuse_req_pages_free(req);
+       req->pages = pages;
+       req->page_descs = page_descs;
+       req->max_pages = npages;
+
+       return true;
+}
+
+void fuse_request_free(struct fuse_req *req)
+{
+       fuse_req_pages_free(req);
        kmem_cache_free(fuse_req_cachep, req);
 }
 
@@ -129,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 
 static void fuse_drop_waiting(struct fuse_conn *fc)
 {
-       if (fc->connected) {
-               atomic_dec(&fc->num_waiting);
-       } else if (atomic_dec_and_test(&fc->num_waiting)) {
+       /*
+        * lockess check of fc->connected is okay, because atomic_dec_and_test()
+        * provides a memory barrier mached with the one in fuse_wait_aborted()
+        * to ensure no wake-up is missed.
+        */
+       if (atomic_dec_and_test(&fc->num_waiting) &&
+           !READ_ONCE(fc->connected)) {
                /* wake up aborters */
                wake_up_all(&fc->blocked_waitq);
        }
@@ -235,8 +275,10 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
        struct file *file = req->stolen_file;
        struct fuse_file *ff = file->private_data;
 
+       WARN_ON(req->max_pages);
        spin_lock(&fc->lock);
-       fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
+       memset(req, 0, sizeof(*req));
+       fuse_request_init(req, NULL, NULL, 0);
        BUG_ON(ff->reserved_req);
        ff->reserved_req = req;
        wake_up_all(&fc->reserved_req_waitq);
@@ -287,10 +329,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
                         * We get here in the unlikely case that a background
                         * request was allocated but not sent
                         */
-                       spin_lock(&fc->lock);
+                       spin_lock(&fc->bg_lock);
                        if (!fc->blocked)
                                wake_up(&fc->blocked_waitq);
-                       spin_unlock(&fc->lock);
+                       spin_unlock(&fc->bg_lock);
                }
 
                if (test_bit(FR_WAITING, &req->flags)) {
@@ -319,7 +361,13 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
 
 static u64 fuse_get_unique(struct fuse_iqueue *fiq)
 {
-       return ++fiq->reqctr;
+       fiq->reqctr += FUSE_REQ_ID_STEP;
+       return fiq->reqctr;
+}
+
+static unsigned int fuse_req_hash(u64 unique)
+{
+       return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
 }
 
 static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
@@ -353,12 +401,13 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 
 static void flush_bg_queue(struct fuse_conn *fc)
 {
+       struct fuse_iqueue *fiq = &fc->iq;
+
        while (fc->active_background < fc->max_background &&
               !list_empty(&fc->bg_queue)) {
                struct fuse_req *req;
-               struct fuse_iqueue *fiq = &fc->iq;
 
-               req = list_entry(fc->bg_queue.next, struct fuse_req, list);
+               req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
                list_del(&req->list);
                fc->active_background++;
                spin_lock(&fiq->waitq.lock);
@@ -389,14 +438,21 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
        WARN_ON(test_bit(FR_PENDING, &req->flags));
        WARN_ON(test_bit(FR_SENT, &req->flags));
        if (test_bit(FR_BACKGROUND, &req->flags)) {
-               spin_lock(&fc->lock);
+               spin_lock(&fc->bg_lock);
                clear_bit(FR_BACKGROUND, &req->flags);
-               if (fc->num_background == fc->max_background)
+               if (fc->num_background == fc->max_background) {
                        fc->blocked = 0;
-
-               /* Wake up next waiter, if any */
-               if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
                        wake_up(&fc->blocked_waitq);
+               } else if (!fc->blocked) {
+                       /*
+                        * Wake up next waiter, if any.  It's okay to use
+                        * waitqueue_active(), as we've already synced up
+                        * fc->blocked with waiters with the wake_up() call
+                        * above.
+                        */
+                       if (waitqueue_active(&fc->blocked_waitq))
+                               wake_up(&fc->blocked_waitq);
+               }
 
                if (fc->num_background == fc->congestion_threshold && fc->sb) {
                        clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
@@ -405,7 +461,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
                fc->num_background--;
                fc->active_background--;
                flush_bg_queue(fc);
-               spin_unlock(&fc->lock);
+               spin_unlock(&fc->bg_lock);
        }
        wake_up(&req->waitq);
        if (req->end)
@@ -573,40 +629,38 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
        return ret;
 }
 
-/*
- * Called under fc->lock
- *
- * fc->connected must have been checked previously
- */
-void fuse_request_send_background_locked(struct fuse_conn *fc,
-                                        struct fuse_req *req)
+bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
 {
-       BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
+       bool queued = false;
+
+       WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
        if (!test_bit(FR_WAITING, &req->flags)) {
                __set_bit(FR_WAITING, &req->flags);
                atomic_inc(&fc->num_waiting);
        }
        __set_bit(FR_ISREPLY, &req->flags);
-       fc->num_background++;
-       if (fc->num_background == fc->max_background)
-               fc->blocked = 1;
-       if (fc->num_background == fc->congestion_threshold && fc->sb) {
-               set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
-               set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
+       spin_lock(&fc->bg_lock);
+       if (likely(fc->connected)) {
+               fc->num_background++;
+               if (fc->num_background == fc->max_background)
+                       fc->blocked = 1;
+               if (fc->num_background == fc->congestion_threshold && fc->sb) {
+                       set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
+                       set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
+               }
+               list_add_tail(&req->list, &fc->bg_queue);
+               flush_bg_queue(fc);
+               queued = true;
        }
-       list_add_tail(&req->list, &fc->bg_queue);
-       flush_bg_queue(fc);
+       spin_unlock(&fc->bg_lock);
+
+       return queued;
 }
 
 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 {
-       BUG_ON(!req->end);
-       spin_lock(&fc->lock);
-       if (fc->connected) {
-               fuse_request_send_background_locked(fc, req);
-               spin_unlock(&fc->lock);
-       } else {
-               spin_unlock(&fc->lock);
+       WARN_ON(!req->end);
+       if (!fuse_request_queue_background(fc, req)) {
                req->out.h.error = -ENOTCONN;
                req->end(fc, req);
                fuse_put_request(fc, req);
@@ -1084,12 +1138,11 @@ __releases(fiq->waitq.lock)
        int err;
 
        list_del_init(&req->intr_entry);
-       req->intr_unique = fuse_get_unique(fiq);
        memset(&ih, 0, sizeof(ih));
        memset(&arg, 0, sizeof(arg));
        ih.len = reqsize;
        ih.opcode = FUSE_INTERRUPT;
-       ih.unique = req->intr_unique;
+       ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
        arg.unique = req->in.h.unique;
 
        spin_unlock(&fiq->waitq.lock);
@@ -1238,6 +1291,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
        struct fuse_req *req;
        struct fuse_in *in;
        unsigned reqsize;
+       unsigned int hash;
 
  restart:
        spin_lock(&fiq->waitq.lock);
@@ -1310,13 +1364,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
                err = reqsize;
                goto out_end;
        }
-       list_move_tail(&req->list, &fpq->processing);
-       spin_unlock(&fpq->lock);
+       hash = fuse_req_hash(req->in.h.unique);
+       list_move_tail(&req->list, &fpq->processing[hash]);
+       __fuse_get_request(req);
        set_bit(FR_SENT, &req->flags);
+       spin_unlock(&fpq->lock);
        /* matches barrier in request_wait_answer() */
        smp_mb__after_atomic();
        if (test_bit(FR_INTERRUPTED, &req->flags))
                queue_interrupt(fiq, req);
+       fuse_put_request(fc, req);
 
        return reqsize;
 
@@ -1663,7 +1720,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        unsigned int num;
        unsigned int offset;
        size_t total_len = 0;
-       int num_pages;
+       unsigned int num_pages;
 
        offset = outarg->offset & ~PAGE_MASK;
        file_size = i_size_read(inode);
@@ -1675,7 +1732,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                num = file_size - outarg->offset;
 
        num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
+       num_pages = min(num_pages, fc->max_pages);
 
        req = fuse_get_req(fc, num_pages);
        if (IS_ERR(req))
@@ -1715,8 +1772,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        req->in.args[1].size = total_len;
 
        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-       if (err)
+       if (err) {
                fuse_retrieve_end(fc, req);
+               fuse_put_request(fc, req);
+       }
 
        return err;
 }
@@ -1792,10 +1851,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 /* Look up request on processing list by unique ID */
 static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
 {
+       unsigned int hash = fuse_req_hash(unique);
        struct fuse_req *req;
 
-       list_for_each_entry(req, &fpq->processing, list) {
-               if (req->in.h.unique == unique || req->intr_unique == unique)
+       list_for_each_entry(req, &fpq->processing[hash], list) {
+               if (req->in.h.unique == unique)
                        return req;
        }
        return NULL;
@@ -1869,22 +1929,26 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
        if (!fpq->connected)
                goto err_unlock_pq;
 
-       req = request_find(fpq, oh.unique);
+       req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
        if (!req)
                goto err_unlock_pq;
 
-       /* Is it an interrupt reply? */
-       if (req->intr_unique == oh.unique) {
+       /* Is it an interrupt reply ID? */
+       if (oh.unique & FUSE_INT_REQ_BIT) {
+               __fuse_get_request(req);
                spin_unlock(&fpq->lock);
 
                err = -EINVAL;
-               if (nbytes != sizeof(struct fuse_out_header))
+               if (nbytes != sizeof(struct fuse_out_header)) {
+                       fuse_put_request(fc, req);
                        goto err_finish;
+               }
 
                if (oh.error == -ENOSYS)
                        fc->no_interrupt = 1;
                else if (oh.error == -EAGAIN)
                        queue_interrupt(&fc->iq, req);
+               fuse_put_request(fc, req);
 
                fuse_copy_finish(cs);
                return nbytes;
@@ -2102,9 +2166,13 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
                struct fuse_dev *fud;
                struct fuse_req *req, *next;
                LIST_HEAD(to_end);
+               unsigned int i;
 
+               /* Background queuing checks fc->connected under bg_lock */
+               spin_lock(&fc->bg_lock);
                fc->connected = 0;
-               fc->blocked = 0;
+               spin_unlock(&fc->bg_lock);
+
                fc->aborted = is_abort;
                fuse_set_initialized(fc);
                list_for_each_entry(fud, &fc->devices, entry) {
@@ -2123,11 +2191,16 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
                                }
                                spin_unlock(&req->waitq.lock);
                        }
-                       list_splice_tail_init(&fpq->processing, &to_end);
+                       for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+                               list_splice_tail_init(&fpq->processing[i],
+                                                     &to_end);
                        spin_unlock(&fpq->lock);
                }
+               spin_lock(&fc->bg_lock);
+               fc->blocked = 0;
                fc->max_background = UINT_MAX;
                flush_bg_queue(fc);
+               spin_unlock(&fc->bg_lock);
 
                spin_lock(&fiq->waitq.lock);
                fiq->connected = 0;
@@ -2152,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
 void fuse_wait_aborted(struct fuse_conn *fc)
 {
+       /* matches implicit memory barrier in fuse_drop_waiting() */
+       smp_mb();
        wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
 }
 
@@ -2163,10 +2238,12 @@ int fuse_dev_release(struct inode *inode, struct file *file)
                struct fuse_conn *fc = fud->fc;
                struct fuse_pqueue *fpq = &fud->pq;
                LIST_HEAD(to_end);
+               unsigned int i;
 
                spin_lock(&fpq->lock);
                WARN_ON(!list_empty(&fpq->io));
-               list_splice_init(&fpq->processing, &to_end);
+               for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+                       list_splice_init(&fpq->processing[i], &to_end);
                spin_unlock(&fpq->lock);
 
                end_requests(fc, &to_end);