r/o bind mounts: create cleanup helper svc_msnfs()
[sfrench/cifs-2.6.git] / fs / ext4 / inode.c
index b34182b6ee4d1ea9cba530ea5dac87a787a8b7c1..0df2b1e06d0b75be46a211b24085ea6485c7b7a6 100644 (file)
@@ -255,8 +255,8 @@ static int verify_chain(Indirect *from, Indirect *to)
  *     @inode: inode in question (we are only interested in its superblock)
  *     @i_block: block number to be parsed
  *     @offsets: array to store the offsets in
- *      @boundary: set this non-zero if the referred-to block is likely to be
- *             followed (on disk) by an indirect block.
+ *     @boundary: set this non-zero if the referred-to block is likely to be
+ *            followed (on disk) by an indirect block.
  *
  *     To store the locations of file's data ext4 uses a data structure common
  *     for UNIX filesystems - tree of pointers anchored in the inode, with
@@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
        /* We are done with atomic stuff, now do the rest of housekeeping */
 
-       inode->i_ctime = CURRENT_TIME_SEC;
+       inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 
        /* had we spliced it onto indirect block? */
@@ -1146,34 +1146,50 @@ static int do_journal_get_write_access(handle_t *handle,
        return ext4_journal_get_write_access(handle, bh);
 }
 
-static int ext4_prepare_write(struct file *file, struct page *page,
-                             unsigned from, unsigned to)
+static int ext4_write_begin(struct file *file, struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned flags,
+                               struct page **pagep, void **fsdata)
 {
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = mapping->host;
        int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
        handle_t *handle;
        int retries = 0;
+       struct page *page;
+       pgoff_t index;
+       unsigned from, to;
+
+       index = pos >> PAGE_CACHE_SHIFT;
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
 
 retry:
-       handle = ext4_journal_start(inode, needed_blocks);
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               goto out;
+       page = __grab_cache_page(mapping, index);
+       if (!page)
+               return -ENOMEM;
+       *pagep = page;
+
+       handle = ext4_journal_start(inode, needed_blocks);
+       if (IS_ERR(handle)) {
+               unlock_page(page);
+               page_cache_release(page);
+               ret = PTR_ERR(handle);
+               goto out;
        }
-       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_prepare_write(page, from, to, ext4_get_block);
-       else
-               ret = block_prepare_write(page, from, to, ext4_get_block);
-       if (ret)
-               goto prepare_write_failed;
 
-       if (ext4_should_journal_data(inode)) {
+       ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+                                                       ext4_get_block);
+
+       if (!ret && ext4_should_journal_data(inode)) {
                ret = walk_page_buffers(handle, page_buffers(page),
                                from, to, NULL, do_journal_get_write_access);
        }
-prepare_write_failed:
-       if (ret)
+
+       if (ret) {
                ext4_journal_stop(handle);
+               unlock_page(page);
+               page_cache_release(page);
+       }
+
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
 out:
@@ -1185,12 +1201,12 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
        int err = jbd2_journal_dirty_data(handle, bh);
        if (err)
                ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
-                                               bh, handle,err);
+                                               bh, handle, err);
        return err;
 }
 
-/* For commit_write() in data=journal mode */
-static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+/* For write_end() in data=journal mode */
+static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 {
        if (!buffer_mapped(bh) || buffer_freed(bh))
                return 0;
@@ -1198,6 +1214,29 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
        return ext4_journal_dirty_metadata(handle, bh);
 }
 
+/*
+ * Generic write_end handler for ordered and writeback ext4 journal modes.
+ * We can't use generic_write_end, because that unlocks the page and we need to
+ * unlock the page after ext4_journal_stop, but ext4_journal_stop must run
+ * after block_write_end.
+ */
+static int ext4_generic_write_end(struct file *file,
+                               struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata)
+{
+       struct inode *inode = file->f_mapping->host;
+
+       copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+       if (pos+copied > inode->i_size) {
+               i_size_write(inode, pos+copied);
+               mark_inode_dirty(inode);
+       }
+
+       return copied;
+}
+
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
@@ -1205,78 +1244,101 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
  * ext4 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-static int ext4_ordered_commit_write(struct file *file, struct page *page,
-                            unsigned from, unsigned to)
+static int ext4_ordered_write_end(struct file *file,
+                               struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = file->f_mapping->host;
+       unsigned from, to;
        int ret = 0, ret2;
 
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
+
        ret = walk_page_buffers(handle, page_buffers(page),
                from, to, NULL, ext4_journal_dirty_data);
 
        if (ret == 0) {
                /*
-                * generic_commit_write() will run mark_inode_dirty() if i_size
+                * generic_write_end() will run mark_inode_dirty() if i_size
                 * changes.  So let's piggyback the i_disksize mark_inode_dirty
                 * into that.
                 */
                loff_t new_i_size;
 
-               new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+               new_i_size = pos + copied;
                if (new_i_size > EXT4_I(inode)->i_disksize)
                        EXT4_I(inode)->i_disksize = new_i_size;
-               ret = generic_commit_write(file, page, from, to);
+               copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+                                                       page, fsdata);
+               if (copied < 0)
+                       ret = copied;
        }
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
-       return ret;
+       unlock_page(page);
+       page_cache_release(page);
+
+       return ret ? ret : copied;
 }
 
-static int ext4_writeback_commit_write(struct file *file, struct page *page,
-                            unsigned from, unsigned to)
+static int ext4_writeback_write_end(struct file *file,
+                               struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = file->f_mapping->host;
        int ret = 0, ret2;
        loff_t new_i_size;
 
-       new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+       new_i_size = pos + copied;
        if (new_i_size > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = new_i_size;
 
-       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_commit_write(file, page, from, to);
-       else
-               ret = generic_commit_write(file, page, from, to);
+       copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+                                                       page, fsdata);
+       if (copied < 0)
+               ret = copied;
 
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
-       return ret;
+       unlock_page(page);
+       page_cache_release(page);
+
+       return ret ? ret : copied;
 }
 
-static int ext4_journalled_commit_write(struct file *file,
-                       struct page *page, unsigned from, unsigned to)
+static int ext4_journalled_write_end(struct file *file,
+                               struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata)
 {
        handle_t *handle = ext4_journal_current_handle();
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = mapping->host;
        int ret = 0, ret2;
        int partial = 0;
-       loff_t pos;
+       unsigned from, to;
 
-       /*
-        * Here we duplicate the generic_commit_write() functionality
-        */
-       pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
+
+       if (copied < len) {
+               if (!PageUptodate(page))
+                       copied = 0;
+               page_zero_new_buffers(page, from+copied, to);
+       }
 
        ret = walk_page_buffers(handle, page_buffers(page), from,
-                               to, &partial, commit_write_fn);
+                               to, &partial, write_end_fn);
        if (!partial)
                SetPageUptodate(page);
-       if (pos > inode->i_size)
-               i_size_write(inode, pos);
+       if (pos+copied > inode->i_size)
+               i_size_write(inode, pos+copied);
        EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
        if (inode->i_size > EXT4_I(inode)->i_disksize) {
                EXT4_I(inode)->i_disksize = inode->i_size;
@@ -1284,10 +1346,14 @@ static int ext4_journalled_commit_write(struct file *file,
                if (!ret)
                        ret = ret2;
        }
+
        ret2 = ext4_journal_stop(handle);
        if (!ret)
                ret = ret2;
-       return ret;
+       unlock_page(page);
+       page_cache_release(page);
+
+       return ret ? ret : copied;
 }
 
 /*
@@ -1545,7 +1611,7 @@ static int ext4_journalled_writepage(struct page *page,
                        PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
 
                err = walk_page_buffers(handle, page_buffers(page), 0,
-                               PAGE_CACHE_SIZE, NULL, commit_write_fn);
+                               PAGE_CACHE_SIZE, NULL, write_end_fn);
                if (ret == 0)
                        ret = err;
                EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
@@ -1705,8 +1771,8 @@ static const struct address_space_operations ext4_ordered_aops = {
        .readpages      = ext4_readpages,
        .writepage      = ext4_ordered_writepage,
        .sync_page      = block_sync_page,
-       .prepare_write  = ext4_prepare_write,
-       .commit_write   = ext4_ordered_commit_write,
+       .write_begin    = ext4_write_begin,
+       .write_end      = ext4_ordered_write_end,
        .bmap           = ext4_bmap,
        .invalidatepage = ext4_invalidatepage,
        .releasepage    = ext4_releasepage,
@@ -1719,8 +1785,8 @@ static const struct address_space_operations ext4_writeback_aops = {
        .readpages      = ext4_readpages,
        .writepage      = ext4_writeback_writepage,
        .sync_page      = block_sync_page,
-       .prepare_write  = ext4_prepare_write,
-       .commit_write   = ext4_writeback_commit_write,
+       .write_begin    = ext4_write_begin,
+       .write_end      = ext4_writeback_write_end,
        .bmap           = ext4_bmap,
        .invalidatepage = ext4_invalidatepage,
        .releasepage    = ext4_releasepage,
@@ -1733,8 +1799,8 @@ static const struct address_space_operations ext4_journalled_aops = {
        .readpages      = ext4_readpages,
        .writepage      = ext4_journalled_writepage,
        .sync_page      = block_sync_page,
-       .prepare_write  = ext4_prepare_write,
-       .commit_write   = ext4_journalled_commit_write,
+       .write_begin    = ext4_write_begin,
+       .write_end      = ext4_journalled_write_end,
        .set_page_dirty = ext4_journalled_set_page_dirty,
        .bmap           = ext4_bmap,
        .invalidatepage = ext4_invalidatepage,
@@ -1766,7 +1832,6 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
        struct inode *inode = mapping->host;
        struct buffer_head *bh;
        int err = 0;
-       void *kaddr;
 
        blocksize = inode->i_sb->s_blocksize;
        length = blocksize - (offset & (blocksize - 1));
@@ -1778,10 +1843,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
         */
        if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
             ext4_should_writeback_data(inode) && PageUptodate(page)) {
-               kaddr = kmap_atomic(page, KM_USER0);
-               memset(kaddr + offset, 0, length);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
+               zero_user_page(page, offset, length, KM_USER0);
                set_page_dirty(page);
                goto unlock;
        }
@@ -1834,10 +1896,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
                        goto unlock;
        }
 
-       kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr + offset, 0, length);
-       flush_dcache_page(page);
-       kunmap_atomic(kaddr, KM_USER0);
+       zero_user_page(page, offset, length, KM_USER0);
 
        BUFFER_TRACE(bh, "zeroed end of block");
 
@@ -2375,7 +2434,7 @@ do_indirects:
        ext4_discard_reservation(inode);
 
        mutex_unlock(&ei->truncate_mutex);
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
 
        /*
@@ -2583,6 +2642,25 @@ void ext4_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_DIRSYNC;
 }
 
+/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
+void ext4_get_inode_flags(struct ext4_inode_info *ei)
+{
+       unsigned int flags = ei->vfs_inode.i_flags;
+
+       ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+                       EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
+       if (flags & S_SYNC)
+               ei->i_flags |= EXT4_SYNC_FL;
+       if (flags & S_APPEND)
+               ei->i_flags |= EXT4_APPEND_FL;
+       if (flags & S_IMMUTABLE)
+               ei->i_flags |= EXT4_IMMUTABLE_FL;
+       if (flags & S_NOATIME)
+               ei->i_flags |= EXT4_NOATIME_FL;
+       if (flags & S_DIRSYNC)
+               ei->i_flags |= EXT4_DIRSYNC_FL;
+}
+
 void ext4_read_inode(struct inode * inode)
 {
        struct ext4_iloc iloc;
@@ -2610,10 +2688,6 @@ void ext4_read_inode(struct inode * inode)
        }
        inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
        inode->i_size = le32_to_cpu(raw_inode->i_size);
-       inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
-       inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
-       inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
-       inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
        ei->i_state = 0;
        ei->i_dir_start_lookup = 0;
@@ -2673,8 +2747,10 @@ void ext4_read_inode(struct inode * inode)
                 */
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-                   EXT4_INODE_SIZE(inode->i_sb))
+                   EXT4_INODE_SIZE(inode->i_sb)) {
+                       brelse (bh);
                        goto bad_inode;
+               }
                if (ei->i_extra_isize == 0) {
                        /* The extra space is currently unused. Use it. */
                        ei->i_extra_isize = sizeof(struct ext4_inode) -
@@ -2689,6 +2765,11 @@ void ext4_read_inode(struct inode * inode)
        } else
                ei->i_extra_isize = 0;
 
+       EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+       EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
+       EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
+       EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
@@ -2742,6 +2823,7 @@ static int ext4_do_update_inode(handle_t *handle,
        if (ei->i_state & EXT4_STATE_NEW)
                memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
 
+       ext4_get_inode_flags(ei);
        raw_inode->i_mode = cpu_to_le16(inode->i_mode);
        if(!(test_opt(inode->i_sb, NO_UID32))) {
                raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
@@ -2769,9 +2851,12 @@ static int ext4_do_update_inode(handle_t *handle,
        }
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
        raw_inode->i_size = cpu_to_le32(ei->i_disksize);
-       raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
-       raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
-       raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+
+       EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+       EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
+       EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
+       EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
+
        raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
        raw_inode->i_flags = cpu_to_le32(ei->i_flags);
@@ -2884,7 +2969,7 @@ int ext4_write_inode(struct inode *inode, int wait)
                return 0;
 
        if (ext4_journal_current_handle()) {
-               jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+               jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
                dump_stack();
                return -EIO;
        }
@@ -3079,6 +3164,39 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
        return err;
 }
 
+/*
+ * Expand an inode by new_extra_isize bytes.
+ * Returns 0 on success or negative error number on failure.
+ */
+int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize,
+                       struct ext4_iloc iloc, handle_t *handle)
+{
+       struct ext4_inode *raw_inode;
+       struct ext4_xattr_ibody_header *header;
+       struct ext4_xattr_entry *entry;
+
+       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+               return 0;
+
+       raw_inode = ext4_raw_inode(&iloc);
+
+       header = IHDR(inode, raw_inode);
+       entry = IFIRST(header);
+
+       /* No extended attributes present */
+       if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) ||
+               header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
+               memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
+                       new_extra_isize);
+               EXT4_I(inode)->i_extra_isize = new_extra_isize;
+               return 0;
+       }
+
+       /* try to expand with EAs present */
+       return ext4_expand_extra_isize_ea(inode, new_extra_isize,
+                                         raw_inode, handle);
+}
+
 /*
  * What we do here is to mark the in-core inode as clean with respect to inode
  * dirtiness (it may still be data-dirty).
@@ -3103,10 +3221,38 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
 int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 {
        struct ext4_iloc iloc;
-       int err;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       static unsigned int mnt_count;
+       int err, ret;
 
        might_sleep();
        err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+           !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
+               /*
+                * We need extra buffer credits since we may write into EA block
+                * with this same handle. If journal_extend fails, then it will
+                * only result in a minor loss of functionality for that inode.
+                * If this is felt to be critical, then e2fsck should be run to
+                * force a large enough s_min_extra_isize.
+                */
+               if ((jbd2_journal_extend(handle,
+                            EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
+                       ret = ext4_expand_extra_isize(inode,
+                                                     sbi->s_want_extra_isize,
+                                                     iloc, handle);
+                       if (ret) {
+                               EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
+                               if (mnt_count != sbi->s_es->s_mnt_count) {
+                                       ext4_warning(inode->i_sb, __FUNCTION__,
+                                       "Unable to expand inode %lu. Delete"
+                                       " some EAs or run e2fsck.",
+                                       inode->i_ino);
+                                       mnt_count = sbi->s_es->s_mnt_count;
+                               }
+                       }
+               }
+       }
        if (!err)
                err = ext4_mark_iloc_dirty(handle, inode, &iloc);
        return err;
@@ -3195,7 +3341,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
         */
 
        journal = EXT4_JOURNAL(inode);
-       if (is_journal_aborted(journal) || IS_RDONLY(inode))
+       if (is_journal_aborted(journal))
                return -EROFS;
 
        jbd2_journal_lock_updates(journal);