2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
10 #include <linux/spinlock.h>
11 #include <linux/completion.h>
12 #include <linux/buffer_head.h>
13 #include <linux/blkdev.h>
14 #include <linux/gfs2_ondisk.h>
15 #include <linux/crc32.h>
16 #include <linux/iomap.h>
31 #include "trace_gfs2.h"
33 /* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
38 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
39 __u16 mp_list[GFS2_MAX_META_HEIGHT];
40 int mp_fheight; /* find_metapath height */
41 int mp_aheight; /* actual height (lookup height) */
45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @dibh: the dinode buffer
48 * @block: the block number that was allocated
49 * @page: The (optional) page. This is looked up if @page is NULL
54 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
55 u64 block, struct page *page)
57 struct inode *inode = &ip->i_inode;
58 struct buffer_head *bh;
61 if (!page || page->index) {
62 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
68 if (!PageUptodate(page)) {
69 void *kaddr = kmap(page);
70 u64 dsize = i_size_read(inode);
72 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
73 dsize = dibh->b_size - sizeof(struct gfs2_dinode);
75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
79 SetPageUptodate(page);
82 if (!page_has_buffers(page))
83 create_empty_buffers(page, BIT(inode->i_blkbits),
86 bh = page_buffers(page);
88 if (!buffer_mapped(bh))
89 map_bh(bh, inode->i_sb, block);
91 set_buffer_uptodate(bh);
92 if (!gfs2_is_jdata(ip))
93 mark_buffer_dirty(bh);
94 if (!gfs2_is_writeback(ip))
95 gfs2_trans_add_data(ip->i_gl, bh);
106 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
107 * @ip: The GFS2 inode to unstuff
108 * @page: The (optional) page. This is looked up if the @page is NULL
110 * This routine unstuffs a dinode and returns it to a "normal" state such
111 * that the height can be grown in the traditional way.
116 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
118 struct buffer_head *bh, *dibh;
119 struct gfs2_dinode *di;
121 int isdir = gfs2_is_dir(ip);
124 down_write(&ip->i_rw_mutex);
126 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (i_size_read(&ip->i_inode)) {
131 /* Get a free block, fill it with the stuffed data,
132 and write it out to disk */
135 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
139 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
140 error = gfs2_dir_get_new_buffer(ip, block, &bh);
143 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
144 dibh, sizeof(struct gfs2_dinode));
147 error = gfs2_unstuffer_page(ip, dibh, block, page);
153 /* Set up the pointer to the new block */
155 gfs2_trans_add_meta(ip->i_gl, dibh);
156 di = (struct gfs2_dinode *)dibh->b_data;
157 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
159 if (i_size_read(&ip->i_inode)) {
160 *(__be64 *)(di + 1) = cpu_to_be64(block);
161 gfs2_add_inode_blocks(&ip->i_inode, 1);
162 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
166 di->di_height = cpu_to_be16(1);
171 up_write(&ip->i_rw_mutex);
177 * find_metapath - Find path through the metadata tree
178 * @sdp: The superblock
179 * @mp: The metapath to return the result in
180 * @block: The disk block to look up
181 * @height: The pre-calculated height of the metadata tree
183 * This routine returns a struct metapath structure that defines a path
184 * through the metadata of inode "ip" to get to block "block".
187 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
188 * filesystem with a blocksize of 4096.
190 * find_metapath() would return a struct metapath structure set to:
191 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
192 * and mp_list[2] = 165.
194 * That means that in order to get to the block containing the byte at
195 * offset 101342453, we would load the indirect block pointed to by pointer
196 * 0 in the dinode. We would then load the indirect block pointed to by
197 * pointer 48 in that indirect block. We would then load the data block
198 * pointed to by pointer 165 in that indirect block.
200 * ----------------------------------------
205 * ----------------------------------------
209 * ----------------------------------------
213 * |0 5 6 7 8 9 0 1 2|
214 * ----------------------------------------
218 * ----------------------------------------
223 * ----------------------------------------
227 * ----------------------------------------
228 * | Data block containing offset |
232 * ----------------------------------------
236 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
237 struct metapath *mp, unsigned int height)
241 mp->mp_fheight = height;
242 for (i = height; i--;)
243 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
246 static inline unsigned int metapath_branch_start(const struct metapath *mp)
248 if (mp->mp_list[0] == 0)
254 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
255 * @height: The metadata height (0 = dinode)
258 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
260 struct buffer_head *bh = mp->mp_bh[height];
262 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
263 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
267 * metapointer - Return pointer to start of metadata in a buffer
268 * @height: The metadata height (0 = dinode)
271 * Return a pointer to the block number of the next height of the metadata
272 * tree given a buffer containing the pointer to the current height of the
276 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
278 __be64 *p = metaptr1(height, mp);
279 return p + mp->mp_list[height];
282 static void gfs2_metapath_ra(struct gfs2_glock *gl,
283 const struct buffer_head *bh, const __be64 *pos)
285 struct buffer_head *rabh;
286 const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
289 for (t = pos; t < endp; t++) {
293 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
294 if (trylock_buffer(rabh)) {
295 if (!buffer_uptodate(rabh)) {
296 rabh->b_end_io = end_buffer_read_sync;
297 submit_bh(REQ_OP_READ,
298 REQ_RAHEAD | REQ_META | REQ_PRIO,
308 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
309 unsigned int x, unsigned int h)
312 __be64 *ptr = metapointer(x, mp);
313 u64 dblock = be64_to_cpu(*ptr);
318 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
322 mp->mp_aheight = x + 1;
327 * lookup_metapath - Walk the metadata tree to a specific point
331 * Assumes that the inode's buffer has already been looked up and
332 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
333 * by find_metapath().
335 * If this function encounters part of the tree which has not been
336 * allocated, it returns the current height of the tree at the point
337 * at which it found the unallocated block. Blocks which are found are
338 * added to the mp->mp_bh[] list.
343 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
345 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
349 * fillup_metapath - fill up buffers for the metadata path to a specific height
352 * @h: The height to which it should be mapped
354 * Similar to lookup_metapath, but does lookups for a range of heights
359 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
364 /* find the first buffer we need to look up. */
365 for (x = h - 1; x > 0; x--) {
370 return __fillup_metapath(ip, mp, x, h);
373 static inline void release_metapath(struct metapath *mp)
377 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
378 if (mp->mp_bh[i] == NULL)
380 brelse(mp->mp_bh[i]);
385 * gfs2_extent_length - Returns length of an extent of blocks
386 * @start: Start of the buffer
387 * @len: Length of the buffer in bytes
388 * @ptr: Current position in the buffer
389 * @limit: Max extent length to return (0 = unlimited)
390 * @eob: Set to 1 if we hit "end of block"
392 * If the first block is zero (unallocated) it will return the number of
393 * unallocated blocks in the extent, otherwise it will return the number
394 * of contiguous blocks in the extent.
396 * Returns: The length of the extent (minimum of one block)
399 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
401 const __be64 *end = (start + len);
402 const __be64 *first = ptr;
403 u64 d = be64_to_cpu(*ptr);
410 if (limit && --limit == 0)
414 } while(be64_to_cpu(*ptr) == d);
417 return (ptr - first);
420 static inline void bmap_lock(struct gfs2_inode *ip, int create)
423 down_write(&ip->i_rw_mutex);
425 down_read(&ip->i_rw_mutex);
428 static inline void bmap_unlock(struct gfs2_inode *ip, int create)
431 up_write(&ip->i_rw_mutex);
433 up_read(&ip->i_rw_mutex);
436 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
437 struct gfs2_glock *gl, unsigned int i,
438 unsigned offset, u64 bn)
440 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
441 ((i > 1) ? sizeof(struct gfs2_meta_header) :
442 sizeof(struct gfs2_dinode)));
444 BUG_ON(mp->mp_bh[i] != NULL);
445 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
446 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
447 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
448 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
450 *ptr = cpu_to_be64(bn);
456 ALLOC_GROW_DEPTH = 1,
457 ALLOC_GROW_HEIGHT = 2,
458 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
461 static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
464 return sdp->sd_inptrs;
465 return sdp->sd_diptrs;
469 * gfs2_bmap_alloc - Build a metadata tree of the requested height
470 * @inode: The GFS2 inode
471 * @lblock: The logical starting block of the extent
472 * @bh_map: This is used to return the mapping details
473 * @zero_new: True if newly allocated blocks should be zeroed
474 * @mp: The metapath, with proper height information calculated
475 * @maxlen: The max number of data blocks to alloc
476 * @dblock: Pointer to return the resulting new block
477 * @dblks: Pointer to return the number of blocks allocated
479 * In this routine we may have to alloc:
480 * i) Indirect blocks to grow the metadata tree height
481 * ii) Indirect blocks to fill in lower part of the metadata tree
484 * The function is in two parts. The first part works out the total
485 * number of blocks which we need. The second part does the actual
486 * allocation asking for an extent at a time (if enough contiguous free
487 * blocks are available, there will only be one request per bmap call)
488 * and uses the state machine to initialise the blocks in order.
490 * Returns: errno on error
493 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
494 unsigned flags, struct metapath *mp)
496 struct gfs2_inode *ip = GFS2_I(inode);
497 struct gfs2_sbd *sdp = GFS2_SB(inode);
498 struct super_block *sb = sdp->sd_vfs;
499 struct buffer_head *dibh = mp->mp_bh[0];
501 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
503 unsigned ptrs_per_blk;
504 const unsigned end_of_metadata = mp->mp_fheight - 1;
506 enum alloc_state state;
509 size_t maxlen = iomap->length >> inode->i_blkbits;
511 BUG_ON(mp->mp_aheight < 1);
512 BUG_ON(dibh == NULL);
514 gfs2_trans_add_meta(ip->i_gl, dibh);
516 if (mp->mp_fheight == mp->mp_aheight) {
517 struct buffer_head *bh;
520 /* Bottom indirect block exists, find unalloced extent size */
521 ptr = metapointer(end_of_metadata, mp);
522 bh = mp->mp_bh[end_of_metadata];
523 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
528 /* Need to allocate indirect blocks */
529 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
531 dblks = min(maxlen, (size_t)(ptrs_per_blk -
532 mp->mp_list[end_of_metadata]));
533 if (mp->mp_fheight == ip->i_height) {
534 /* Writing into existing tree, extend tree down */
535 iblks = mp->mp_fheight - mp->mp_aheight;
536 state = ALLOC_GROW_DEPTH;
538 /* Building up tree height */
539 state = ALLOC_GROW_HEIGHT;
540 iblks = mp->mp_fheight - ip->i_height;
541 branch_start = metapath_branch_start(mp);
542 iblks += (mp->mp_fheight - branch_start);
546 /* start of the second part of the function (state machine) */
548 blks = dblks + iblks;
553 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
557 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
558 gfs2_trans_add_unrevoke(sdp, bn, n);
560 /* Growing height of tree */
561 case ALLOC_GROW_HEIGHT:
563 ptr = (__be64 *)(dibh->b_data +
564 sizeof(struct gfs2_dinode));
567 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
569 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
570 if (i - 1 == mp->mp_fheight - ip->i_height) {
572 gfs2_buffer_copy_tail(mp->mp_bh[i],
573 sizeof(struct gfs2_meta_header),
574 dibh, sizeof(struct gfs2_dinode));
575 gfs2_buffer_clear_tail(dibh,
576 sizeof(struct gfs2_dinode) +
578 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
579 sizeof(struct gfs2_meta_header));
581 state = ALLOC_GROW_DEPTH;
582 for(i = branch_start; i < mp->mp_fheight; i++) {
583 if (mp->mp_bh[i] == NULL)
585 brelse(mp->mp_bh[i]);
592 /* Branching from existing tree */
593 case ALLOC_GROW_DEPTH:
594 if (i > 1 && i < mp->mp_fheight)
595 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
596 for (; i < mp->mp_fheight && n > 0; i++, n--)
597 gfs2_indirect_init(mp, ip->i_gl, i,
598 mp->mp_list[i-1], bn++);
599 if (i == mp->mp_fheight)
603 /* Tree complete, adding data blocks */
606 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
607 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
609 ptr = metapointer(end_of_metadata, mp);
610 iomap->addr = bn << inode->i_blkbits;
611 iomap->flags |= IOMAP_F_NEW;
613 *ptr++ = cpu_to_be64(bn++);
614 if (flags & IOMAP_ZERO) {
615 ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
619 "Failed to zero data buffers\n");
620 flags &= ~IOMAP_ZERO;
625 } while (iomap->addr == IOMAP_NULL_ADDR);
627 iomap->length = (u64)dblks << inode->i_blkbits;
628 ip->i_height = mp->mp_fheight;
629 gfs2_add_inode_blocks(&ip->i_inode, alloced);
630 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
635 * hole_size - figure out the size of a hole
637 * @lblock: The logical starting block number
640 * Returns: The hole size in bytes
643 static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
645 struct gfs2_inode *ip = GFS2_I(inode);
646 struct gfs2_sbd *sdp = GFS2_SB(inode);
647 struct metapath mp_eof;
651 const __be64 *first, *end, *ptr;
652 const struct buffer_head *bh;
653 u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
657 /* Get another metapath, to the very last byte */
658 find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
659 for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
663 first = metapointer(hgt, mp);
664 end = (const __be64 *)(bh->b_data + bh->b_size);
666 for (ptr = first; ptr < end; ptr++) {
675 zeroptrs = sdp->sd_inptrs;
677 if (factor * zeroptrs >= lblock_stop - lblock + 1) {
678 holesz = lblock_stop - lblock + 1;
681 holesz += factor * zeroptrs;
683 factor *= sdp->sd_inptrs;
684 if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
685 (mp->mp_list[hgt - 1])++;
687 return holesz << inode->i_blkbits;
690 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
692 struct gfs2_inode *ip = GFS2_I(inode);
694 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
695 sizeof(struct gfs2_dinode);
697 iomap->length = i_size_read(inode);
698 iomap->type = IOMAP_MAPPED;
699 iomap->flags = IOMAP_F_DATA_INLINE;
703 * gfs2_iomap_begin - Map blocks from an inode to disk blocks
705 * @pos: Starting position in bytes
706 * @length: Length to map, in bytes
707 * @flags: iomap flags
708 * @iomap: The iomap structure
712 int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
713 unsigned flags, struct iomap *iomap)
715 struct gfs2_inode *ip = GFS2_I(inode);
716 struct gfs2_sbd *sdp = GFS2_SB(inode);
717 struct metapath mp = { .mp_aheight = 1, };
718 unsigned int factor = sdp->sd_sb.sb_bsize;
719 const u64 *arr = sdp->sd_heightsize;
726 struct buffer_head *bh;
729 trace_gfs2_iomap_start(ip, pos, length, flags);
735 if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) {
736 gfs2_stuffed_iomap(inode, iomap);
737 if (pos >= iomap->length)
743 lblock = pos >> inode->i_blkbits;
744 lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
746 iomap->offset = lblock << inode->i_blkbits;
747 iomap->addr = IOMAP_NULL_ADDR;
748 iomap->type = IOMAP_HOLE;
749 iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
750 iomap->flags = IOMAP_F_MERGED;
754 * Directory data blocks have a struct gfs2_meta_header header, so the
755 * remaining size is smaller than the filesystem block size. Logical
756 * block numbers for directories are in units of this remaining size!
758 if (gfs2_is_dir(ip)) {
759 factor = sdp->sd_jbsize;
760 arr = sdp->sd_jheightsize;
763 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
767 height = ip->i_height;
768 while ((lblock + 1) * factor > arr[height])
770 find_metapath(sdp, lblock, &mp, height);
771 if (height > ip->i_height || gfs2_is_stuffed(ip))
774 ret = lookup_metapath(ip, &mp);
778 if (mp.mp_aheight != ip->i_height)
781 ptr = metapointer(ip->i_height - 1, &mp);
785 iomap->type = IOMAP_MAPPED;
786 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
788 bh = mp.mp_bh[ip->i_height - 1];
789 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
791 iomap->flags |= IOMAP_F_BOUNDARY;
792 iomap->length = (u64)len << inode->i_blkbits;
797 release_metapath(&mp);
800 trace_gfs2_iomap_end(ip, iomap, ret);
804 if (!(flags & IOMAP_WRITE)) {
805 if (pos >= i_size_read(inode)) {
810 iomap->length = hole_size(inode, lblock, &mp);
814 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
819 * gfs2_block_map - Map a block from an inode to a disk block
821 * @lblock: The logical block number
822 * @bh_map: The bh to be mapped
823 * @create: True if its ok to alloc blocks to satify the request
825 * Sets buffer_mapped() if successful, sets buffer_boundary() if a
826 * read of metadata will be required before the next block can be
827 * mapped. Sets buffer_new() if new blocks were allocated.
832 int gfs2_block_map(struct inode *inode, sector_t lblock,
833 struct buffer_head *bh_map, int create)
835 struct gfs2_inode *ip = GFS2_I(inode);
839 clear_buffer_mapped(bh_map);
840 clear_buffer_new(bh_map);
841 clear_buffer_boundary(bh_map);
842 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
845 flags |= IOMAP_WRITE;
846 if (buffer_zeronew(bh_map))
848 ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
849 bh_map->b_size, flags, &iomap);
851 if (!create && ret == -ENOENT) {
852 /* Return unmapped buffer beyond the end of file. */
858 if (iomap.length > bh_map->b_size) {
859 iomap.length = bh_map->b_size;
860 iomap.flags &= ~IOMAP_F_BOUNDARY;
862 if (iomap.addr != IOMAP_NULL_ADDR)
863 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
864 bh_map->b_size = iomap.length;
865 if (iomap.flags & IOMAP_F_BOUNDARY)
866 set_buffer_boundary(bh_map);
867 if (iomap.flags & IOMAP_F_NEW)
868 set_buffer_new(bh_map);
871 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
876 * Deprecated: do not use in new code
878 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
880 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
888 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
889 ret = gfs2_block_map(inode, lblock, &bh, create);
890 *extlen = bh.b_size >> inode->i_blkbits;
891 *dblock = bh.b_blocknr;
900 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
902 * This is partly borrowed from ext3.
904 static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
906 struct inode *inode = mapping->host;
907 struct gfs2_inode *ip = GFS2_I(inode);
908 unsigned long index = from >> PAGE_SHIFT;
909 unsigned offset = from & (PAGE_SIZE-1);
910 unsigned blocksize, iblock, length, pos;
911 struct buffer_head *bh;
915 page = find_or_create_page(mapping, index, GFP_NOFS);
919 blocksize = inode->i_sb->s_blocksize;
920 length = blocksize - (offset & (blocksize - 1));
921 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
923 if (!page_has_buffers(page))
924 create_empty_buffers(page, blocksize, 0);
926 /* Find the buffer that contains "offset" */
927 bh = page_buffers(page);
929 while (offset >= pos) {
930 bh = bh->b_this_page;
937 if (!buffer_mapped(bh)) {
938 gfs2_block_map(inode, iblock, bh, 0);
939 /* unmapped? It's a hole - nothing to do */
940 if (!buffer_mapped(bh))
944 /* Ok, it's mapped. Make sure it's up-to-date */
945 if (PageUptodate(page))
946 set_buffer_uptodate(bh);
948 if (!buffer_uptodate(bh)) {
950 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
952 /* Uhhuh. Read error. Complain and punt. */
953 if (!buffer_uptodate(bh))
958 if (!gfs2_is_writeback(ip))
959 gfs2_trans_add_data(ip->i_gl, bh);
961 zero_user(page, offset, length);
962 mark_buffer_dirty(bh);
969 #define GFS2_JTRUNC_REVOKES 8192
972 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
973 * @inode: The inode being truncated
974 * @oldsize: The original (larger) size
975 * @newsize: The new smaller size
977 * With jdata files, we have to journal a revoke for each block which is
978 * truncated. As a result, we need to split this into separate transactions
979 * if the number of pages being truncated gets too large.
982 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
984 struct gfs2_sbd *sdp = GFS2_SB(inode);
985 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
989 while (oldsize != newsize) {
990 struct gfs2_trans *tr;
993 chunk = oldsize - newsize;
994 if (chunk > max_chunk)
997 offs = oldsize & ~PAGE_MASK;
998 if (offs && chunk > PAGE_SIZE)
999 chunk = offs + ((chunk - offs) & PAGE_MASK);
1001 truncate_pagecache(inode, oldsize - chunk);
1004 tr = current->journal_info;
1005 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1008 gfs2_trans_end(sdp);
1009 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1017 static int trunc_start(struct inode *inode, u64 newsize)
1019 struct gfs2_inode *ip = GFS2_I(inode);
1020 struct gfs2_sbd *sdp = GFS2_SB(inode);
1021 struct address_space *mapping = inode->i_mapping;
1022 struct buffer_head *dibh = NULL;
1023 int journaled = gfs2_is_jdata(ip);
1024 u64 oldsize = inode->i_size;
1028 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1030 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1034 error = gfs2_meta_inode_buffer(ip, &dibh);
1038 gfs2_trans_add_meta(ip->i_gl, dibh);
1040 if (gfs2_is_stuffed(ip)) {
1041 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1043 if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) {
1044 error = gfs2_block_truncate_page(mapping, newsize);
1048 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1051 i_size_write(inode, newsize);
1052 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1053 gfs2_dinode_out(ip, dibh->b_data);
1056 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1058 truncate_pagecache(inode, newsize);
1062 if (current->journal_info)
1063 gfs2_trans_end(sdp);
1068 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1070 * @rg_gh: holder of resource group glock
1071 * @mp: current metapath fully populated with buffers
1072 * @btotal: place to keep count of total blocks freed
1073 * @hgt: height we're processing
1074 * @first: true if this is the first call to this function for this height
1076 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1077 * free, and free them all. However, we do it one rgrp at a time. If this
1078 * block has references to multiple rgrps, we break it into individual
1079 * transactions. This allows other processes to use the rgrps while we're
1080 * focused on a single one, for better concurrency / performance.
1081 * At every transaction boundary, we rewrite the inode into the journal.
1082 * That way the bitmaps are kept consistent with the inode and we can recover
1083 * if we're interrupted by power-outages.
1085 * Returns: 0, or return code if an error occurred.
1086 * *btotal has the total number of blocks freed
1088 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1089 const struct metapath *mp, u32 *btotal, int hgt,
1092 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1093 struct gfs2_rgrpd *rgd;
1094 struct gfs2_trans *tr;
1095 struct buffer_head *bh = mp->mp_bh[hgt];
1096 __be64 *top, *bottom, *p;
1097 int blks_outside_rgrp;
1098 u64 bn, bstart, isize_blks;
1099 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1100 int meta = ((hgt != ip->i_height - 1) ? 1 : 0);
1102 bool buf_in_tr = false; /* buffer was added to transaction */
1104 if (gfs2_metatype_check(sdp, bh,
1105 (hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)))
1109 blks_outside_rgrp = 0;
1112 top = metapointer(hgt, mp); /* first ptr from metapath */
1113 /* If we're keeping some data at the truncation point, we've got to
1114 preserve the metadata tree by adding 1 to the starting metapath. */
1118 bottom = (__be64 *)(bh->b_data + bh->b_size);
1120 for (p = top; p < bottom; p++) {
1123 bn = be64_to_cpu(*p);
1124 if (gfs2_holder_initialized(rd_gh)) {
1125 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1126 gfs2_assert_withdraw(sdp,
1127 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1129 rgd = gfs2_blk2rgrpd(sdp, bn, true);
1130 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1135 /* Must be done with the rgrp glock held: */
1136 if (gfs2_rs_active(&ip->i_res) &&
1137 rgd == ip->i_res.rs_rbm.rgd)
1138 gfs2_rs_deltree(&ip->i_res);
1141 if (!rgrp_contains_block(rgd, bn)) {
1142 blks_outside_rgrp++;
1146 /* The size of our transactions will be unknown until we
1147 actually process all the metadata blocks that relate to
1148 the rgrp. So we estimate. We know it can't be more than
1149 the dinode's i_blocks and we don't want to exceed the
1150 journal flush threshold, sd_log_thresh2. */
1151 if (current->journal_info == NULL) {
1152 unsigned int jblocks_rqsted, revokes;
1154 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1156 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1157 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1159 atomic_read(&sdp->sd_log_thresh2);
1161 jblocks_rqsted += isize_blks;
1162 revokes = jblocks_rqsted;
1164 revokes += hptrs(sdp, hgt);
1165 else if (ip->i_depth)
1166 revokes += sdp->sd_inptrs;
1167 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1170 down_write(&ip->i_rw_mutex);
1172 /* check if we will exceed the transaction blocks requested */
1173 tr = current->journal_info;
1174 if (tr->tr_num_buf_new + RES_STATFS +
1175 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1176 /* We set blks_outside_rgrp to ensure the loop will
1177 be repeated for the same rgrp, but with a new
1179 blks_outside_rgrp++;
1180 /* This next part is tricky. If the buffer was added
1181 to the transaction, we've already set some block
1182 pointers to 0, so we better follow through and free
1183 them, or we will introduce corruption (so break).
1184 This may be impossible, or at least rare, but I
1185 decided to cover the case regardless.
1187 If the buffer was not added to the transaction
1188 (this call), doing so would exceed our transaction
1189 size, so we need to end the transaction and start a
1190 new one (so goto). */
1197 gfs2_trans_add_meta(ip->i_gl, bh);
1200 if (bstart + blen == bn) {
1205 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1207 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1213 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1215 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1218 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1219 outside the rgrp we just processed,
1220 do it all over again. */
1221 if (current->journal_info) {
1222 struct buffer_head *dibh = mp->mp_bh[0];
1224 /* Every transaction boundary, we rewrite the dinode
1225 to keep its di_blocks current in case of failure. */
1226 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1227 current_time(&ip->i_inode);
1228 gfs2_trans_add_meta(ip->i_gl, dibh);
1229 gfs2_dinode_out(ip, dibh->b_data);
1230 up_write(&ip->i_rw_mutex);
1231 gfs2_trans_end(sdp);
1233 gfs2_glock_dq_uninit(rd_gh);
1242 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1243 * assumes the metapath is valid (with buffers) out to height h
1244 * @mp: starting metapath
1245 * @h: desired height to search
1247 * Returns: true if a non-null pointer was found in the metapath buffer
1248 * false if all remaining pointers are NULL in the buffer
1250 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1254 unsigned int ptrs = hptrs(sdp, h) - 1;
1257 ptr = metapointer(h, mp);
1258 if (*ptr) { /* if we have a non-null pointer */
1259 /* Now zero the metapath after the current height. */
1261 if (h < GFS2_MAX_META_HEIGHT)
1262 memset(&mp->mp_list[h], 0,
1263 (GFS2_MAX_META_HEIGHT - h) *
1264 sizeof(mp->mp_list[0]));
1268 if (mp->mp_list[h] < ptrs)
1271 return false; /* no more pointers in this buffer */
1275 enum dealloc_states {
1276 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1277 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1278 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1279 DEALLOC_DONE = 3, /* process complete */
1282 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h)
1284 if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0])))
1290 * trunc_dealloc - truncate a file down to a desired size
1291 * @ip: inode to truncate
1292 * @newsize: The desired size of the file
1294 * This function truncates a file to newsize. It works from the
1295 * bottom up, and from the right to the left. In other words, it strips off
1296 * the highest layer (data) before stripping any of the metadata. Doing it
1297 * this way is best in case the operation is interrupted by power failure, etc.
1298 * The dinode is rewritten in every transaction to guarantee integrity.
1300 static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1302 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1304 struct buffer_head *dibh, *bh;
1305 struct gfs2_holder rd_gh;
1307 __u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */
1308 unsigned int strip_h = ip->i_height - 1;
1311 int mp_h; /* metapath buffers are read in to this height */
1312 sector_t last_ra = 0;
1314 bool preserve1; /* need to preserve the first meta pointer? */
1319 lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift;
1321 memset(&mp, 0, sizeof(mp));
1322 find_metapath(sdp, lblock, &mp, ip->i_height);
1324 memcpy(&nbof, &mp.mp_list, sizeof(nbof));
1326 ret = gfs2_meta_inode_buffer(ip, &dibh);
1331 ret = lookup_metapath(ip, &mp);
1334 if (mp.mp_aheight == ip->i_height)
1335 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1337 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1339 ret = gfs2_rindex_update(sdp);
1343 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1346 gfs2_holder_mark_uninitialized(&rd_gh);
1350 while (state != DEALLOC_DONE) {
1352 /* Truncate a full metapath at the given strip height.
1353 * Note that strip_h == mp_h in order to be in this state. */
1354 case DEALLOC_MP_FULL:
1355 if (mp_h > 0) { /* issue read-ahead on metadata */
1358 bh = mp.mp_bh[mp_h - 1];
1359 if (bh->b_blocknr != last_ra) {
1360 last_ra = bh->b_blocknr;
1361 top = metaptr1(mp_h - 1, &mp);
1362 gfs2_metapath_ra(ip->i_gl, bh, top);
1365 /* If we're truncating to a non-zero size and the mp is
1366 at the beginning of file for the strip height, we
1367 need to preserve the first metadata pointer. */
1368 preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
1369 bh = mp.mp_bh[mp_h];
1370 gfs2_assert_withdraw(sdp, bh);
1371 if (gfs2_assert_withdraw(sdp,
1372 prev_bnr != bh->b_blocknr)) {
1373 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1374 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1376 (unsigned long long)ip->i_no_addr,
1377 prev_bnr, ip->i_height, strip_h, mp_h);
1379 prev_bnr = bh->b_blocknr;
1380 ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal,
1382 /* If we hit an error or just swept dinode buffer,
1385 state = DEALLOC_DONE;
1388 state = DEALLOC_MP_LOWER;
1391 /* lower the metapath strip height */
1392 case DEALLOC_MP_LOWER:
1393 /* We're done with the current buffer, so release it,
1394 unless it's the dinode buffer. Then back up to the
1395 previous pointer. */
1397 brelse(mp.mp_bh[mp_h]);
1398 mp.mp_bh[mp_h] = NULL;
1400 /* If we can't get any lower in height, we've stripped
1401 off all we can. Next step is to back up and start
1402 stripping the previous level of metadata. */
1405 memcpy(&mp.mp_list, &nbof, sizeof(nbof));
1407 state = DEALLOC_FILL_MP;
1410 mp.mp_list[mp_h] = 0;
1411 mp_h--; /* search one metadata height down */
1412 if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1)
1413 break; /* loop around in the same state */
1415 /* Here we've found a part of the metapath that is not
1416 * allocated. We need to search at that height for the
1417 * next non-null pointer. */
1418 if (find_nonnull_ptr(sdp, &mp, mp_h)) {
1419 state = DEALLOC_FILL_MP;
1422 /* No more non-null pointers at this height. Back up
1423 to the previous height and try again. */
1424 break; /* loop around in the same state */
1426 /* Fill the metapath with buffers to the given height. */
1427 case DEALLOC_FILL_MP:
1428 /* Fill the buffers out to the current height. */
1429 ret = fillup_metapath(ip, &mp, mp_h);
1433 /* If buffers found for the entire strip height */
1434 if (mp.mp_aheight - 1 == strip_h) {
1435 state = DEALLOC_MP_FULL;
1438 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1439 mp_h = mp.mp_aheight - 1;
1441 /* If we find a non-null block pointer, crawl a bit
1442 higher up in the metapath and try again, otherwise
1443 we need to look lower for a new starting point. */
1444 if (find_nonnull_ptr(sdp, &mp, mp_h))
1447 state = DEALLOC_MP_LOWER;
1453 if (current->journal_info == NULL) {
1454 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1458 down_write(&ip->i_rw_mutex);
1460 gfs2_statfs_change(sdp, 0, +btotal, 0);
1461 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1463 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1464 gfs2_trans_add_meta(ip->i_gl, dibh);
1465 gfs2_dinode_out(ip, dibh->b_data);
1466 up_write(&ip->i_rw_mutex);
1467 gfs2_trans_end(sdp);
1471 if (gfs2_holder_initialized(&rd_gh))
1472 gfs2_glock_dq_uninit(&rd_gh);
1473 if (current->journal_info) {
1474 up_write(&ip->i_rw_mutex);
1475 gfs2_trans_end(sdp);
1478 gfs2_quota_unhold(ip);
1480 release_metapath(&mp);
1484 static int trunc_end(struct gfs2_inode *ip)
1486 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1487 struct buffer_head *dibh;
1490 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1494 down_write(&ip->i_rw_mutex);
1496 error = gfs2_meta_inode_buffer(ip, &dibh);
1500 if (!i_size_read(&ip->i_inode)) {
1502 ip->i_goal = ip->i_no_addr;
1503 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1504 gfs2_ordered_del_inode(ip);
1506 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1507 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1509 gfs2_trans_add_meta(ip->i_gl, dibh);
1510 gfs2_dinode_out(ip, dibh->b_data);
1514 up_write(&ip->i_rw_mutex);
1515 gfs2_trans_end(sdp);
1520 * do_shrink - make a file smaller
1522 * @newsize: the size to make the file
1524 * Called with an exclusive lock on @inode. The @size must
1525 * be equal to or smaller than the current inode size.
1530 static int do_shrink(struct inode *inode, u64 newsize)
1532 struct gfs2_inode *ip = GFS2_I(inode);
1535 error = trunc_start(inode, newsize);
1538 if (gfs2_is_stuffed(ip))
1541 error = trunc_dealloc(ip, newsize);
1543 error = trunc_end(ip);
1548 void gfs2_trim_blocks(struct inode *inode)
1552 ret = do_shrink(inode, inode->i_size);
1557 * do_grow - Touch and update inode size
1559 * @size: The new size
1561 * This function updates the timestamps on the inode and
1562 * may also increase the size of the inode. This function
1563 * must not be called with @size any smaller than the current
1566 * Although it is not strictly required to unstuff files here,
1567 * earlier versions of GFS2 have a bug in the stuffed file reading
1568 * code which will result in a buffer overrun if the size is larger
1569 * than the max stuffed file size. In order to prevent this from
1570 * occurring, such files are unstuffed, but in other cases we can
1571 * just update the inode size directly.
1573 * Returns: 0 on success, or -ve on error
1576 static int do_grow(struct inode *inode, u64 size)
1578 struct gfs2_inode *ip = GFS2_I(inode);
1579 struct gfs2_sbd *sdp = GFS2_SB(inode);
1580 struct gfs2_alloc_parms ap = { .target = 1, };
1581 struct buffer_head *dibh;
1585 if (gfs2_is_stuffed(ip) &&
1586 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1587 error = gfs2_quota_lock_check(ip, &ap);
1591 error = gfs2_inplace_reserve(ip, &ap);
1593 goto do_grow_qunlock;
1597 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1598 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1601 goto do_grow_release;
1604 error = gfs2_unstuff_dinode(ip, NULL);
1609 error = gfs2_meta_inode_buffer(ip, &dibh);
1613 i_size_write(inode, size);
1614 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1615 gfs2_trans_add_meta(ip->i_gl, dibh);
1616 gfs2_dinode_out(ip, dibh->b_data);
1620 gfs2_trans_end(sdp);
1623 gfs2_inplace_release(ip);
1625 gfs2_quota_unlock(ip);
1631 * gfs2_setattr_size - make a file a given size
1633 * @newsize: the size to make the file
1635 * The file size can grow, shrink, or stay the same size. This
1636 * is called holding i_mutex and an exclusive glock on the inode
1642 int gfs2_setattr_size(struct inode *inode, u64 newsize)
1644 struct gfs2_inode *ip = GFS2_I(inode);
1647 BUG_ON(!S_ISREG(inode->i_mode));
1649 ret = inode_newsize_ok(inode, newsize);
1653 inode_dio_wait(inode);
1655 ret = gfs2_rsqa_alloc(ip);
1659 if (newsize >= inode->i_size) {
1660 ret = do_grow(inode, newsize);
1664 ret = do_shrink(inode, newsize);
1666 gfs2_rsqa_delete(ip, NULL);
1670 int gfs2_truncatei_resume(struct gfs2_inode *ip)
1673 error = trunc_dealloc(ip, i_size_read(&ip->i_inode));
1675 error = trunc_end(ip);
1679 int gfs2_file_dealloc(struct gfs2_inode *ip)
1681 return trunc_dealloc(ip, 0);
1685 * gfs2_free_journal_extents - Free cached journal bmap info
1690 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1692 struct gfs2_journal_extent *jext;
1694 while(!list_empty(&jd->extent_list)) {
1695 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1696 list_del(&jext->list);
1702 * gfs2_add_jextent - Add or merge a new extent to extent cache
1703 * @jd: The journal descriptor
1704 * @lblock: The logical block at start of new extent
1705 * @dblock: The physical block at start of new extent
1706 * @blocks: Size of extent in fs blocks
1708 * Returns: 0 on success or -ENOMEM
1711 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1713 struct gfs2_journal_extent *jext;
1715 if (!list_empty(&jd->extent_list)) {
1716 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1717 if ((jext->dblock + jext->blocks) == dblock) {
1718 jext->blocks += blocks;
1723 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1726 jext->dblock = dblock;
1727 jext->lblock = lblock;
1728 jext->blocks = blocks;
1729 list_add_tail(&jext->list, &jd->extent_list);
1735 * gfs2_map_journal_extents - Cache journal bmap info
1736 * @sdp: The super block
1737 * @jd: The journal to map
1739 * Create a reusable "extent" mapping from all logical
1740 * blocks to all physical blocks for the given journal. This will save
1741 * us time when writing journal blocks. Most journals will have only one
1742 * extent that maps all their logical blocks. That's because gfs2.mkfs
1743 * arranges the journal blocks sequentially to maximize performance.
1744 * So the extent would map the first block for the entire file length.
1745 * However, gfs2_jadd can happen while file activity is happening, so
1746 * those journals may not be sequential. Less likely is the case where
1747 * the users created their own journals by mounting the metafs and
1748 * laying it out. But it's still possible. These journals might have
1751 * Returns: 0 on success, or error on failure
1754 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1758 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1759 struct buffer_head bh;
1760 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1764 lblock_stop = i_size_read(jd->jd_inode) >> shift;
1765 size = (lblock_stop - lblock) << shift;
1767 WARN_ON(!list_empty(&jd->extent_list));
1773 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1774 if (rc || !buffer_mapped(&bh))
1776 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1780 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1783 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1788 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1790 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1792 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1793 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1794 bh.b_state, (unsigned long long)bh.b_size);
1795 gfs2_free_journal_extents(jd);
1800 * gfs2_write_alloc_required - figure out if a write will require an allocation
1801 * @ip: the file being written to
1802 * @offset: the offset to write to
1803 * @len: the number of bytes being written
1805 * Returns: 1 if an alloc is required, 0 otherwise
1808 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1811 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1812 struct buffer_head bh;
1814 u64 lblock, lblock_stop, size;
1820 if (gfs2_is_stuffed(ip)) {
1822 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1827 shift = sdp->sd_sb.sb_bsize_shift;
1828 BUG_ON(gfs2_is_dir(ip));
1829 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
1830 lblock = offset >> shift;
1831 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1832 if (lblock_stop > end_of_file)
1835 size = (lblock_stop - lblock) << shift;
1839 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1840 if (!buffer_mapped(&bh))
1843 lblock += (bh.b_size >> ip->i_inode.i_blkbits);