Merge remote-tracking branch 'linus/master' into testing
authorSage Weil <sage@inktank.com>
Thu, 15 Aug 2013 18:11:45 +0000 (11:11 -0700)
committerSage Weil <sage@inktank.com>
Thu, 15 Aug 2013 18:11:45 +0000 (11:11 -0700)
drivers/block/rbd.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/mds_client.c
fs/ceph/super.h
net/ceph/messenger.c
net/ceph/osd_client.c

index 4ad2ad9a5bb01448d6d2206a3387575718c47b87..0d669ae80d61d4e2a2b33733054bf13b047f22f6 100644 (file)
@@ -2163,9 +2163,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
        struct rbd_obj_request *obj_request = NULL;
        struct rbd_obj_request *next_obj_request;
        bool write_request = img_request_write_test(img_request);
-       struct bio *bio_list = 0;
+       struct bio *bio_list = NULL;
        unsigned int bio_offset = 0;
-       struct page **pages = 0;
+       struct page **pages = NULL;
        u64 img_offset;
        u64 resid;
        u16 opcode;
index 5318a3b704f6d6f908520a9c1fc18b4dadc9a509..722585cd5c7ea62af78dc8bf6e0d83d587488b5a 100644 (file)
@@ -150,10 +150,6 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
        struct ceph_inode_info *ci;
        struct ceph_snap_context *snapc = page_snap_context(page);
 
-       BUG_ON(!PageLocked(page));
-       BUG_ON(!PagePrivate(page));
-       BUG_ON(!page->mapping);
-
        inode = page->mapping->host;
 
        /*
index 25442b40c25a71761596e071612140f01279fb69..430121a795bdcb8e463ba2a03f7e7b1903bbdd14 100644 (file)
@@ -2333,6 +2333,38 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                iput(inode);
 }
 
+/*
+ * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
+ */
+static void invalidate_aliases(struct inode *inode)
+{
+       struct dentry *dn, *prev = NULL;
+
+       dout("invalidate_aliases inode %p\n", inode);
+       d_prune_aliases(inode);
+       /*
+        * For non-directory inode, d_find_alias() only returns
+        * connected dentry. After calling d_delete(), the dentry
+        * become disconnected.
+        *
+        * For directory inode, d_find_alias() only can return
+        * disconnected dentry. But directory inode should have
+        * one alias at most.
+        */
+       while ((dn = d_find_alias(inode))) {
+               if (dn == prev) {
+                       dput(dn);
+                       break;
+               }
+               d_delete(dn);
+               if (prev)
+                       dput(prev);
+               prev = dn;
+       }
+       if (prev)
+               dput(prev);
+}
+
 /*
  * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
  * actually be a revocation if it specifies a smaller cap set.)
@@ -2363,6 +2395,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        int writeback = 0;
        int revoked_rdcache = 0;
        int queue_invalidate = 0;
+       int deleted_inode = 0;
 
        dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
             inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2407,8 +2440,12 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
                     from_kgid(&init_user_ns, inode->i_gid));
        }
 
-       if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+       if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
                set_nlink(inode, le32_to_cpu(grant->nlink));
+               if (inode->i_nlink == 0 &&
+                   (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+                       deleted_inode = 1;
+       }
 
        if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
                int len = le32_to_cpu(grant->xattr_len);
@@ -2517,6 +2554,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
                ceph_queue_writeback(inode);
        if (queue_invalidate)
                ceph_queue_invalidate(inode);
+       if (deleted_inode)
+               invalidate_aliases(inode);
        if (wake)
                wake_up_all(&ci->i_cap_wq);
 
index a40ceda47a3218ee53c2167d8844899c5de3e9cf..868b61d56cac77f3a8328d5ba4851ec7947fe827 100644 (file)
@@ -793,6 +793,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        req->r_locked_dir = dir;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+       /* release LINK_SHARED on source inode (mds will lock it) */
+       req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (err) {
                d_drop(dentry);
index 2ddf061c1c4af730885365b07dcb9388d7af98f9..bc0735498d293e9c45a0bada42a9443e0f36e382 100644 (file)
@@ -313,9 +313,9 @@ static int striped_read(struct inode *inode,
 {
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       u64 pos, this_len;
+       u64 pos, this_len, left;
        int io_align, page_align;
-       int left, pages_left;
+       int pages_left;
        int read;
        struct page **page_pos;
        int ret;
@@ -346,7 +346,7 @@ more:
                ret = 0;
        hit_stripe = this_len < left;
        was_short = ret >= 0 && ret < this_len;
-       dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
+       dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
             ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
 
        if (ret > 0) {
@@ -378,7 +378,7 @@ more:
                        if (pos + left > inode->i_size)
                                left = inode->i_size - pos;
 
-                       dout("zero tail %d\n", left);
+                       dout("zero tail %llu\n", left);
                        ceph_zero_page_vector_range(page_align + read, left,
                                                    pages);
                        read += left;
@@ -659,7 +659,6 @@ again:
 
        if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
            (iocb->ki_filp->f_flags & O_DIRECT) ||
-           (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
            (fi->flags & CEPH_F_SYNC))
                /* hmm, this isn't really async... */
                ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
@@ -711,13 +710,11 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
        ssize_t count, written = 0;
        int err, want, got;
-       bool hold_mutex;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
        mutex_lock(&inode->i_mutex);
-       hold_mutex = true;
 
        err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
        if (err)
@@ -763,18 +760,24 @@ retry_snap:
 
        if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
            (iocb->ki_filp->f_flags & O_DIRECT) ||
-           (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
            (fi->flags & CEPH_F_SYNC)) {
                mutex_unlock(&inode->i_mutex);
                written = ceph_sync_write(file, iov->iov_base, count,
                                          pos, &iocb->ki_pos);
+               if (written == -EOLDSNAPC) {
+                       dout("aio_write %p %llx.%llx %llu~%u"
+                               "got EOLDSNAPC, retrying\n",
+                               inode, ceph_vinop(inode),
+                               pos, (unsigned)iov->iov_len);
+                       mutex_lock(&inode->i_mutex);
+                       goto retry_snap;
+               }
        } else {
                written = generic_file_buffered_write(iocb, iov, nr_segs,
                                                      pos, &iocb->ki_pos,
                                                      count, 0);
                mutex_unlock(&inode->i_mutex);
        }
-       hold_mutex = false;
 
        if (written >= 0) {
                int dirty;
@@ -798,18 +801,12 @@ retry_snap:
                        written = err;
        }
 
-       if (written == -EOLDSNAPC) {
-               dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
-                    inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
-               mutex_lock(&inode->i_mutex);
-               hold_mutex = true;
-               goto retry_snap;
-       }
+       goto out_unlocked;
+
 out:
-       if (hold_mutex)
-               mutex_unlock(&inode->i_mutex);
+       mutex_unlock(&inode->i_mutex);
+out_unlocked:
        current->backing_dev_info = NULL;
-
        return written ? written : err;
 }
 
index f3a2abf28a77df362faf5c38dc471a64dcbfdffc..98b6e50bde04dfe6fc60cf381d068055b2a07db3 100644 (file)
@@ -61,6 +61,14 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
        return inode;
 }
 
+struct inode *ceph_lookup_inode(struct super_block *sb, struct ceph_vino vino)
+{
+       struct inode *inode;
+       ino_t t = ceph_vino_to_ino(vino);
+       inode = ilookup5_nowait(sb, t, ceph_ino_compare, &vino);
+       return inode;
+}
+
 /*
  * get/constuct snapdir inode for a given directory
  */
@@ -1465,7 +1473,14 @@ static void ceph_vmtruncate_work(struct work_struct *work)
        struct inode *inode = &ci->vfs_inode;
 
        dout("vmtruncate_work %p\n", inode);
-       mutex_lock(&inode->i_mutex);
+       if (!mutex_trylock(&inode->i_mutex)) {
+               /*
+                * the i_mutex can be hold by a writer who is waiting for
+                * caps. wake up waiters, they will do pending vmtruncate.
+                */
+               wake_up_all(&ci->i_cap_wq);
+               mutex_lock(&inode->i_mutex);
+       }
        __ceph_do_pending_vmtruncate(inode);
        mutex_unlock(&inode->i_mutex);
        iput(inode);
index e0b4ef31d3c870c9e73fecad303e9f9957542385..669622fd1ae3d52af418cc4c283a5f22513bca73 100644 (file)
@@ -196,8 +196,10 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
        r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
                                          &dl.object_no, &dl.object_offset,
                                          &olen);
-       if (r < 0)
+       if (r < 0) {
+               up_read(&osdc->map_sem);
                return -EIO;
+       }
        dl.file_offset -= dl.object_offset;
        dl.object_size = ceph_file_layout_object_size(ci->i_layout);
        dl.block_size = ceph_file_layout_su(ci->i_layout);
@@ -209,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
        snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
                 ceph_ino(inode), dl.object_no);
 
-       ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
-               ceph_file_layout_pg_pool(ci->i_layout));
+       r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+                               ceph_file_layout_pg_pool(ci->i_layout));
+       if (r < 0) {
+               up_read(&osdc->map_sem);
+               return r;
+       }
 
        dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
        if (dl.osd >= 0) {
index 187bf214444da8c8fc9c6a8603b699a258f773f8..603786b564bed08e2591761ae79aa5ef0c926849 100644 (file)
@@ -414,6 +414,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 {
        struct ceph_mds_session *s;
 
+       if (mds >= mdsc->mdsmap->m_max_mds)
+               return ERR_PTR(-EINVAL);
+
        s = kzalloc(sizeof(*s), GFP_NOFS);
        if (!s)
                return ERR_PTR(-ENOMEM);
@@ -1028,6 +1031,37 @@ static void remove_session_caps(struct ceph_mds_session *session)
 {
        dout("remove_session_caps on %p\n", session);
        iterate_session_caps(session, remove_session_caps_cb, NULL);
+
+       spin_lock(&session->s_cap_lock);
+       if (session->s_nr_caps > 0) {
+               struct super_block *sb = session->s_mdsc->fsc->sb;
+               struct inode *inode;
+               struct ceph_cap *cap, *prev = NULL;
+               struct ceph_vino vino;
+               /*
+                * iterate_session_caps() skips inodes that are being
+                * deleted, we need to wait until deletions are complete.
+                * __wait_on_freeing_inode() is designed for the job,
+                * but it is not exported, so use lookup inode function
+                * to access it.
+                */
+               while (!list_empty(&session->s_caps)) {
+                       cap = list_entry(session->s_caps.next,
+                                        struct ceph_cap, session_caps);
+                       if (cap == prev)
+                               break;
+                       prev = cap;
+                       vino = cap->ci->i_vino;
+                       spin_unlock(&session->s_cap_lock);
+
+                       inode = ceph_lookup_inode(sb, vino);
+                       iput(inode);
+
+                       spin_lock(&session->s_cap_lock);
+               }
+       }
+       spin_unlock(&session->s_cap_lock);
+
        BUG_ON(session->s_nr_caps > 0);
        BUG_ON(!list_empty(&session->s_cap_flushing));
        cleanup_cap_releases(session);
index cbded572345e77a107e539aa4e433d6f6f7964c0..afcd62a68916e358676d4fa4e7b7abf3c94ecb43 100644 (file)
@@ -677,6 +677,8 @@ extern void ceph_destroy_inode(struct inode *inode);
 
 extern struct inode *ceph_get_inode(struct super_block *sb,
                                    struct ceph_vino vino);
+extern struct inode *ceph_lookup_inode(struct super_block *sb,
+                                      struct ceph_vino vino);
 extern struct inode *ceph_get_snapdir(struct inode *parent);
 extern int ceph_fill_file_size(struct inode *inode, int issued,
                               u32 truncate_seq, u64 truncate_size, u64 size);
index eb0a46a49bd42351d23878f118384ab09a8d2ee8..dd9b5857ef5cdad15dd50f69455d2b5929f3f8c8 100644 (file)
@@ -290,7 +290,7 @@ int ceph_msgr_init(void)
        if (ceph_msgr_slab_init())
                return -ENOMEM;
 
-       ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
+       ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
        if (ceph_msgr_wq)
                return 0;
 
index dd47889adc4aec94941d6f17105878ebe235db8f..dbc0a7392d67b67b276e985453aa5e6d5ae8ee9f 100644 (file)
@@ -2129,6 +2129,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
                        dout("osdc_start_request failed map, "
                                " will retry %lld\n", req->r_tid);
                        rc = 0;
+               } else {
+                       __unregister_request(osdc, req);
                }
                goto out_unlock;
        }