fs/gfs2/bmap.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/spinlock.h>
  11 #include <linux/completion.h>
  12 #include <linux/buffer_head.h>
  13 #include <linux/blkdev.h>
  14 #include <linux/gfs2_ondisk.h>
  15 #include <linux/crc32.h>
  16 #include <linux/iomap.h>
  17
  18 #include "gfs2.h"
  19 #include "incore.h"
  20 #include "bmap.h"
  21 #include "glock.h"
  22 #include "inode.h"
  23 #include "meta_io.h"
  24 #include "quota.h"
  25 #include "rgrp.h"
  26 #include "log.h"
  27 #include "super.h"
  28 #include "trans.h"
  29 #include "dir.h"
  30 #include "util.h"
  31 #include "trace_gfs2.h"
  32
  33 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  34  * block is 512, so __u16 is fine for that. It saves stack space to
  35  * keep it small.
  36  */
  37 struct metapath {
  38         struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
  39         __u16 mp_list[GFS2_MAX_META_HEIGHT];
  40         int mp_fheight; /* find_metapath height */
  41         int mp_aheight; /* actual height (lookup height) */
  42 };
  43
  44 /**
  45  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  46  * @ip: the inode
  47  * @dibh: the dinode buffer
  48  * @block: the block number that was allocated
  49  * @page: The (optional) page. This is looked up if @page is NULL
  50  *
  51  * Returns: errno
  52  */
  53
  54 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
  55                                u64 block, struct page *page)
  56 {
  57         struct inode *inode = &ip->i_inode;
  58         struct buffer_head *bh;
  59         int release = 0;
  60
  61         if (!page || page->index) {
  62                 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
  63                 if (!page)
  64                         return -ENOMEM;
  65                 release = 1;
  66         }
  67
  68         if (!PageUptodate(page)) {
  69                 void *kaddr = kmap(page);
  70                 u64 dsize = i_size_read(inode);
  71
  72                 if (dsize > gfs2_max_stuffed_size(ip))
  73                         dsize = gfs2_max_stuffed_size(ip);
  74
  75                 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  76                 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
  77                 kunmap(page);
  78
  79                 SetPageUptodate(page);
  80         }
  81
  82         if (!page_has_buffers(page))
  83                 create_empty_buffers(page, BIT(inode->i_blkbits),
  84                                      BIT(BH_Uptodate));
  85
  86         bh = page_buffers(page);
  87
  88         if (!buffer_mapped(bh))
  89                 map_bh(bh, inode->i_sb, block);
  90
  91         set_buffer_uptodate(bh);
  92         if (!gfs2_is_jdata(ip))
  93                 mark_buffer_dirty(bh);
  94         if (!gfs2_is_writeback(ip))
  95                 gfs2_trans_add_data(ip->i_gl, bh);
  96
  97         if (release) {
  98                 unlock_page(page);
  99                 put_page(page);
 100         }
 101
 102         return 0;
 103 }
 104
 105 /**
 106  * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 107  * @ip: The GFS2 inode to unstuff
 108  * @page: The (optional) page. This is looked up if the @page is NULL
 109  *
 110  * This routine unstuffs a dinode and returns it to a "normal" state such
 111  * that the height can be grown in the traditional way.
 112  *
 113  * Returns: errno
 114  */
 115
 116 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 117 {
 118         struct buffer_head *bh, *dibh;
 119         struct gfs2_dinode *di;
 120         u64 block = 0;
 121         int isdir = gfs2_is_dir(ip);
 122         int error;
 123
 124         down_write(&ip->i_rw_mutex);
 125
 126         error = gfs2_meta_inode_buffer(ip, &dibh);
 127         if (error)
 128                 goto out;
 129
 130         if (i_size_read(&ip->i_inode)) {
 131                 /* Get a free block, fill it with the stuffed data,
 132                    and write it out to disk */
 133
 134                 unsigned int n = 1;
 135                 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
 136                 if (error)
 137                         goto out_brelse;
 138                 if (isdir) {
 139                         gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 140                         error = gfs2_dir_get_new_buffer(ip, block, &bh);
 141                         if (error)
 142                                 goto out_brelse;
 143                         gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
 144                                               dibh, sizeof(struct gfs2_dinode));
 145                         brelse(bh);
 146                 } else {
 147                         error = gfs2_unstuffer_page(ip, dibh, block, page);
 148                         if (error)
 149                                 goto out_brelse;
 150                 }
 151         }
 152
 153         /*  Set up the pointer to the new block  */
 154
 155         gfs2_trans_add_meta(ip->i_gl, dibh);
 156         di = (struct gfs2_dinode *)dibh->b_data;
 157         gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 158
 159         if (i_size_read(&ip->i_inode)) {
 160                 *(__be64 *)(di + 1) = cpu_to_be64(block);
 161                 gfs2_add_inode_blocks(&ip->i_inode, 1);
 162                 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 163         }
 164
 165         ip->i_height = 1;
 166         di->di_height = cpu_to_be16(1);
 167
 168 out_brelse:
 169         brelse(dibh);
 170 out:
 171         up_write(&ip->i_rw_mutex);
 172         return error;
 173 }
 174
 175
 176 /**
 177  * find_metapath - Find path through the metadata tree
 178  * @sdp: The superblock
 179  * @mp: The metapath to return the result in
 180  * @block: The disk block to look up
 181  * @height: The pre-calculated height of the metadata tree
 182  *
 183  *   This routine returns a struct metapath structure that defines a path
 184  *   through the metadata of inode "ip" to get to block "block".
 185  *
 186  *   Example:
 187  *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 188  *   filesystem with a blocksize of 4096.
 189  *
 190  *   find_metapath() would return a struct metapath structure set to:
 191  *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
 192  *   and mp_list[2] = 165.
 193  *
 194  *   That means that in order to get to the block containing the byte at
 195  *   offset 101342453, we would load the indirect block pointed to by pointer
 196  *   0 in the dinode.  We would then load the indirect block pointed to by
 197  *   pointer 48 in that indirect block.  We would then load the data block
 198  *   pointed to by pointer 165 in that indirect block.
 199  *
 200  *             ----------------------------------------
 201  *             | Dinode |                             |
 202  *             |        |                            4|
 203  *             |        |0 1 2 3 4 5                 9|
 204  *             |        |                            6|
 205  *             ----------------------------------------
 206  *                       |
 207  *                       |
 208  *                       V
 209  *             ----------------------------------------
 210  *             | Indirect Block                       |
 211  *             |                                     5|
 212  *             |            4 4 4 4 4 5 5            1|
 213  *             |0           5 6 7 8 9 0 1            2|
 214  *             ----------------------------------------
 215  *                                |
 216  *                                |
 217  *                                V
 218  *             ----------------------------------------
 219  *             | Indirect Block                       |
 220  *             |                         1 1 1 1 1   5|
 221  *             |                         6 6 6 6 6   1|
 222  *             |0                        3 4 5 6 7   2|
 223  *             ----------------------------------------
 224  *                                           |
 225  *                                           |
 226  *                                           V
 227  *             ----------------------------------------
 228  *             | Data block containing offset         |
 229  *             |            101342453                 |
 230  *             |                                      |
 231  *             |                                      |
 232  *             ----------------------------------------
 233  *
 234  */
 235
 236 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 237                           struct metapath *mp, unsigned int height)
 238 {
 239         unsigned int i;
 240
 241         mp->mp_fheight = height;
 242         for (i = height; i--;)
 243                 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
 244 }
 245
 246 static inline unsigned int metapath_branch_start(const struct metapath *mp)
 247 {
 248         if (mp->mp_list[0] == 0)
 249                 return 2;
 250         return 1;
 251 }
 252
 253 /**
 254  * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 255  * @height: The metadata height (0 = dinode)
 256  * @mp: The metapath
 257  */
 258 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
 259 {
 260         struct buffer_head *bh = mp->mp_bh[height];
 261         if (height == 0)
 262                 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
 263         return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
 264 }
 265
 266 /**
 267  * metapointer - Return pointer to start of metadata in a buffer
 268  * @height: The metadata height (0 = dinode)
 269  * @mp: The metapath
 270  *
 271  * Return a pointer to the block number of the next height of the metadata
 272  * tree given a buffer containing the pointer to the current height of the
 273  * metadata tree.
 274  */
 275
 276 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 277 {
 278         __be64 *p = metaptr1(height, mp);
 279         return p + mp->mp_list[height];
 280 }
 281
 282 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 283 {
 284         const __be64 *t;
 285
 286         for (t = start; t < end; t++) {
 287                 struct buffer_head *rabh;
 288
 289                 if (!*t)
 290                         continue;
 291
 292                 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
 293                 if (trylock_buffer(rabh)) {
 294                         if (!buffer_uptodate(rabh)) {
 295                                 rabh->b_end_io = end_buffer_read_sync;
 296                                 submit_bh(REQ_OP_READ,
 297                                           REQ_RAHEAD | REQ_META | REQ_PRIO,
 298                                           rabh);
 299                                 continue;
 300                         }
 301                         unlock_buffer(rabh);
 302                 }
 303                 brelse(rabh);
 304         }
 305 }
 306
 307 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 308                              unsigned int x, unsigned int h)
 309 {
 310         for (; x < h; x++) {
 311                 __be64 *ptr = metapointer(x, mp);
 312                 u64 dblock = be64_to_cpu(*ptr);
 313                 int ret;
 314
 315                 if (!dblock)
 316                         break;
 317                 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
 318                 if (ret)
 319                         return ret;
 320         }
 321         mp->mp_aheight = x + 1;
 322         return 0;
 323 }
 324
 325 /**
 326  * lookup_metapath - Walk the metadata tree to a specific point
 327  * @ip: The inode
 328  * @mp: The metapath
 329  *
 330  * Assumes that the inode's buffer has already been looked up and
 331  * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 332  * by find_metapath().
 333  *
 334  * If this function encounters part of the tree which has not been
 335  * allocated, it returns the current height of the tree at the point
 336  * at which it found the unallocated block. Blocks which are found are
 337  * added to the mp->mp_bh[] list.
 338  *
 339  * Returns: error
 340  */
 341
 342 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 343 {
 344         return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 345 }
 346
 347 /**
 348  * fillup_metapath - fill up buffers for the metadata path to a specific height
 349  * @ip: The inode
 350  * @mp: The metapath
 351  * @h: The height to which it should be mapped
 352  *
 353  * Similar to lookup_metapath, but does lookups for a range of heights
 354  *
 355  * Returns: error or the number of buffers filled
 356  */
 357
 358 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 359 {
 360         unsigned int x = 0;
 361         int ret;
 362
 363         if (h) {
 364                 /* find the first buffer we need to look up. */
 365                 for (x = h - 1; x > 0; x--) {
 366                         if (mp->mp_bh[x])
 367                                 break;
 368                 }
 369         }
 370         ret = __fillup_metapath(ip, mp, x, h);
 371         if (ret)
 372                 return ret;
 373         return mp->mp_aheight - x - 1;
 374 }
 375
 376 static inline void release_metapath(struct metapath *mp)
 377 {
 378         int i;
 379
 380         for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
 381                 if (mp->mp_bh[i] == NULL)
 382                         break;
 383                 brelse(mp->mp_bh[i]);
 384         }
 385 }
 386
 387 /**
 388  * gfs2_extent_length - Returns length of an extent of blocks
 389  * @start: Start of the buffer
 390  * @len: Length of the buffer in bytes
 391  * @ptr: Current position in the buffer
 392  * @limit: Max extent length to return (0 = unlimited)
 393  * @eob: Set to 1 if we hit "end of block"
 394  *
 395  * If the first block is zero (unallocated) it will return the number of
 396  * unallocated blocks in the extent, otherwise it will return the number
 397  * of contiguous blocks in the extent.
 398  *
 399  * Returns: The length of the extent (minimum of one block)
 400  */
 401
 402 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
 403 {
 404         const __be64 *end = (start + len);
 405         const __be64 *first = ptr;
 406         u64 d = be64_to_cpu(*ptr);
 407
 408         *eob = 0;
 409         do {
 410                 ptr++;
 411                 if (ptr >= end)
 412                         break;
 413                 if (limit && --limit == 0)
 414                         break;
 415                 if (d)
 416                         d++;
 417         } while(be64_to_cpu(*ptr) == d);
 418         if (ptr >= end)
 419                 *eob = 1;
 420         return (ptr - first);
 421 }
 422
 423 static inline void bmap_lock(struct gfs2_inode *ip, int create)
 424 {
 425         if (create)
 426                 down_write(&ip->i_rw_mutex);
 427         else
 428                 down_read(&ip->i_rw_mutex);
 429 }
 430
 431 static inline void bmap_unlock(struct gfs2_inode *ip, int create)
 432 {
 433         if (create)
 434                 up_write(&ip->i_rw_mutex);
 435         else
 436                 up_read(&ip->i_rw_mutex);
 437 }
 438
 439 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 440                                          struct gfs2_glock *gl, unsigned int i,
 441                                          unsigned offset, u64 bn)
 442 {
 443         __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
 444                        ((i > 1) ? sizeof(struct gfs2_meta_header) :
 445                                  sizeof(struct gfs2_dinode)));
 446         BUG_ON(i < 1);
 447         BUG_ON(mp->mp_bh[i] != NULL);
 448         mp->mp_bh[i] = gfs2_meta_new(gl, bn);
 449         gfs2_trans_add_meta(gl, mp->mp_bh[i]);
 450         gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 451         gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
 452         ptr += offset;
 453         *ptr = cpu_to_be64(bn);
 454         return ptr;
 455 }
 456
 457 enum alloc_state {
 458         ALLOC_DATA = 0,
 459         ALLOC_GROW_DEPTH = 1,
 460         ALLOC_GROW_HEIGHT = 2,
 461         /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 462 };
 463
 464 /**
 465  * gfs2_bmap_alloc - Build a metadata tree of the requested height
 466  * @inode: The GFS2 inode
 467  * @lblock: The logical starting block of the extent
 468  * @bh_map: This is used to return the mapping details
 469  * @zero_new: True if newly allocated blocks should be zeroed
 470  * @mp: The metapath, with proper height information calculated
 471  * @maxlen: The max number of data blocks to alloc
 472  * @dblock: Pointer to return the resulting new block
 473  * @dblks: Pointer to return the number of blocks allocated
 474  *
 475  * In this routine we may have to alloc:
 476  *   i) Indirect blocks to grow the metadata tree height
 477  *  ii) Indirect blocks to fill in lower part of the metadata tree
 478  * iii) Data blocks
 479  *
 480  * The function is in two parts. The first part works out the total
 481  * number of blocks which we need. The second part does the actual
 482  * allocation asking for an extent at a time (if enough contiguous free
 483  * blocks are available, there will only be one request per bmap call)
 484  * and uses the state machine to initialise the blocks in order.
 485  *
 486  * Returns: errno on error
 487  */
 488
 489 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 490                             unsigned flags, struct metapath *mp)
 491 {
 492         struct gfs2_inode *ip = GFS2_I(inode);
 493         struct gfs2_sbd *sdp = GFS2_SB(inode);
 494         struct super_block *sb = sdp->sd_vfs;
 495         struct buffer_head *dibh = mp->mp_bh[0];
 496         u64 bn;
 497         unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 498         unsigned dblks = 0;
 499         unsigned ptrs_per_blk;
 500         const unsigned end_of_metadata = mp->mp_fheight - 1;
 501         int ret;
 502         enum alloc_state state;
 503         __be64 *ptr;
 504         __be64 zero_bn = 0;
 505         size_t maxlen = iomap->length >> inode->i_blkbits;
 506
 507         BUG_ON(mp->mp_aheight < 1);
 508         BUG_ON(dibh == NULL);
 509
 510         gfs2_trans_add_meta(ip->i_gl, dibh);
 511
 512         if (mp->mp_fheight == mp->mp_aheight) {
 513                 struct buffer_head *bh;
 514                 int eob;
 515
 516                 /* Bottom indirect block exists, find unalloced extent size */
 517                 ptr = metapointer(end_of_metadata, mp);
 518                 bh = mp->mp_bh[end_of_metadata];
 519                 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
 520                                            maxlen, &eob);
 521                 BUG_ON(dblks < 1);
 522                 state = ALLOC_DATA;
 523         } else {
 524                 /* Need to allocate indirect blocks */
 525                 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
 526                         sdp->sd_diptrs;
 527                 dblks = min(maxlen, (size_t)(ptrs_per_blk -
 528                                              mp->mp_list[end_of_metadata]));
 529                 if (mp->mp_fheight == ip->i_height) {
 530                         /* Writing into existing tree, extend tree down */
 531                         iblks = mp->mp_fheight - mp->mp_aheight;
 532                         state = ALLOC_GROW_DEPTH;
 533                 } else {
 534                         /* Building up tree height */
 535                         state = ALLOC_GROW_HEIGHT;
 536                         iblks = mp->mp_fheight - ip->i_height;
 537                         branch_start = metapath_branch_start(mp);
 538                         iblks += (mp->mp_fheight - branch_start);
 539                 }
 540         }
 541
 542         /* start of the second part of the function (state machine) */
 543
 544         blks = dblks + iblks;
 545         i = mp->mp_aheight;
 546         do {
 547                 int error;
 548                 n = blks - alloced;
 549                 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 550                 if (error)
 551                         return error;
 552                 alloced += n;
 553                 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 554                         gfs2_trans_add_unrevoke(sdp, bn, n);
 555                 switch (state) {
 556                 /* Growing height of tree */
 557                 case ALLOC_GROW_HEIGHT:
 558                         if (i == 1) {
 559                                 ptr = (__be64 *)(dibh->b_data +
 560                                                  sizeof(struct gfs2_dinode));
 561                                 zero_bn = *ptr;
 562                         }
 563                         for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
 564                              i++, n--)
 565                                 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
 566                         if (i - 1 == mp->mp_fheight - ip->i_height) {
 567                                 i--;
 568                                 gfs2_buffer_copy_tail(mp->mp_bh[i],
 569                                                 sizeof(struct gfs2_meta_header),
 570                                                 dibh, sizeof(struct gfs2_dinode));
 571                                 gfs2_buffer_clear_tail(dibh,
 572                                                 sizeof(struct gfs2_dinode) +
 573                                                 sizeof(__be64));
 574                                 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
 575                                         sizeof(struct gfs2_meta_header));
 576                                 *ptr = zero_bn;
 577                                 state = ALLOC_GROW_DEPTH;
 578                                 for(i = branch_start; i < mp->mp_fheight; i++) {
 579                                         if (mp->mp_bh[i] == NULL)
 580                                                 break;
 581                                         brelse(mp->mp_bh[i]);
 582                                         mp->mp_bh[i] = NULL;
 583                                 }
 584                                 i = branch_start;
 585                         }
 586                         if (n == 0)
 587                                 break;
 588                 /* Branching from existing tree */
 589                 case ALLOC_GROW_DEPTH:
 590                         if (i > 1 && i < mp->mp_fheight)
 591                                 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
 592                         for (; i < mp->mp_fheight && n > 0; i++, n--)
 593                                 gfs2_indirect_init(mp, ip->i_gl, i,
 594                                                    mp->mp_list[i-1], bn++);
 595                         if (i == mp->mp_fheight)
 596                                 state = ALLOC_DATA;
 597                         if (n == 0)
 598                                 break;
 599                 /* Tree complete, adding data blocks */
 600                 case ALLOC_DATA:
 601                         BUG_ON(n > dblks);
 602                         BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
 603                         gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 604                         dblks = n;
 605                         ptr = metapointer(end_of_metadata, mp);
 606                         iomap->addr = bn << inode->i_blkbits;
 607                         iomap->flags |= IOMAP_F_NEW;
 608                         while (n-- > 0)
 609                                 *ptr++ = cpu_to_be64(bn++);
 610                         if (flags & IOMAP_ZERO) {
 611                                 ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
 612                                                        dblks, GFP_NOFS);
 613                                 if (ret) {
 614                                         fs_err(sdp,
 615                                                "Failed to zero data buffers\n");
 616                                         flags &= ~IOMAP_ZERO;
 617                                 }
 618                         }
 619                         break;
 620                 }
 621         } while (iomap->addr == IOMAP_NULL_ADDR);
 622
 623         iomap->length = (u64)dblks << inode->i_blkbits;
 624         ip->i_height = mp->mp_fheight;
 625         gfs2_add_inode_blocks(&ip->i_inode, alloced);
 626         gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
 627         return 0;
 628 }
 629
 630 /**
 631  * hole_size - figure out the size of a hole
 632  * @inode: The inode
 633  * @lblock: The logical starting block number
 634  * @mp: The metapath
 635  *
 636  * Returns: The hole size in bytes
 637  *
 638  */
 639 static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
 640 {
 641         struct gfs2_inode *ip = GFS2_I(inode);
 642         struct gfs2_sbd *sdp = GFS2_SB(inode);
 643         struct metapath mp_eof;
 644         u64 factor = 1;
 645         int hgt;
 646         u64 holesz = 0;
 647         const __be64 *first, *end, *ptr;
 648         const struct buffer_head *bh;
 649         u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
 650         int zeroptrs;
 651         bool done = false;
 652
 653         /* Get another metapath, to the very last byte */
 654         find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
 655         for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
 656                 bh = mp->mp_bh[hgt];
 657                 if (bh) {
 658                         zeroptrs = 0;
 659                         first = metapointer(hgt, mp);
 660                         end = (const __be64 *)(bh->b_data + bh->b_size);
 661
 662                         for (ptr = first; ptr < end; ptr++) {
 663                                 if (*ptr) {
 664                                         done = true;
 665                                         break;
 666                                 } else {
 667                                         zeroptrs++;
 668                                 }
 669                         }
 670                 } else {
 671                         zeroptrs = sdp->sd_inptrs;
 672                 }
 673                 if (factor * zeroptrs >= lblock_stop - lblock + 1) {
 674                         holesz = lblock_stop - lblock + 1;
 675                         break;
 676                 }
 677                 holesz += factor * zeroptrs;
 678
 679                 factor *= sdp->sd_inptrs;
 680                 if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
 681                         (mp->mp_list[hgt - 1])++;
 682         }
 683         return holesz << inode->i_blkbits;
 684 }
 685
 686 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
 687 {
 688         struct gfs2_inode *ip = GFS2_I(inode);
 689
 690         iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 691                       sizeof(struct gfs2_dinode);
 692         iomap->offset = 0;
 693         iomap->length = i_size_read(inode);
 694         iomap->type = IOMAP_MAPPED;
 695         iomap->flags = IOMAP_F_DATA_INLINE;
 696 }
 697
 698 /**
 699  * gfs2_iomap_begin - Map blocks from an inode to disk blocks
 700  * @inode: The inode
 701  * @pos: Starting position in bytes
 702  * @length: Length to map, in bytes
 703  * @flags: iomap flags
 704  * @iomap: The iomap structure
 705  *
 706  * Returns: errno
 707  */
 708 int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 709                      unsigned flags, struct iomap *iomap)
 710 {
 711         struct gfs2_inode *ip = GFS2_I(inode);
 712         struct gfs2_sbd *sdp = GFS2_SB(inode);
 713         struct metapath mp = { .mp_aheight = 1, };
 714         unsigned int factor = sdp->sd_sb.sb_bsize;
 715         const u64 *arr = sdp->sd_heightsize;
 716         __be64 *ptr;
 717         sector_t lblock;
 718         sector_t lend;
 719         int ret = 0;
 720         int eob;
 721         unsigned int len;
 722         struct buffer_head *bh;
 723         u8 height;
 724
 725         trace_gfs2_iomap_start(ip, pos, length, flags);
 726         if (!length) {
 727                 ret = -EINVAL;
 728                 goto out;
 729         }
 730
 731         if (gfs2_is_stuffed(ip)) {
 732                 if (flags & IOMAP_REPORT) {
 733                         gfs2_stuffed_iomap(inode, iomap);
 734                         if (pos >= iomap->length)
 735                                 ret = -ENOENT;
 736                         goto out;
 737                 }
 738                 BUG_ON(!(flags & IOMAP_WRITE));
 739         }
 740
 741         lblock = pos >> inode->i_blkbits;
 742         lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
 743
 744         iomap->offset = lblock << inode->i_blkbits;
 745         iomap->addr = IOMAP_NULL_ADDR;
 746         iomap->type = IOMAP_HOLE;
 747         iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
 748         iomap->flags = IOMAP_F_MERGED;
 749         bmap_lock(ip, flags & IOMAP_WRITE);
 750
 751         /*
 752          * Directory data blocks have a struct gfs2_meta_header header, so the
 753          * remaining size is smaller than the filesystem block size.  Logical
 754          * block numbers for directories are in units of this remaining size!
 755          */
 756         if (gfs2_is_dir(ip)) {
 757                 factor = sdp->sd_jbsize;
 758                 arr = sdp->sd_jheightsize;
 759         }
 760
 761         ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
 762         if (ret)
 763                 goto out_release;
 764
 765         height = ip->i_height;
 766         while ((lblock + 1) * factor > arr[height])
 767                 height++;
 768         find_metapath(sdp, lblock, &mp, height);
 769         if (height > ip->i_height || gfs2_is_stuffed(ip))
 770                 goto do_alloc;
 771
 772         ret = lookup_metapath(ip, &mp);
 773         if (ret)
 774                 goto out_release;
 775
 776         if (mp.mp_aheight != ip->i_height)
 777                 goto do_alloc;
 778
 779         ptr = metapointer(ip->i_height - 1, &mp);
 780         if (*ptr == 0)
 781                 goto do_alloc;
 782
 783         iomap->type = IOMAP_MAPPED;
 784         iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 785
 786         bh = mp.mp_bh[ip->i_height - 1];
 787         len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
 788         if (eob)
 789                 iomap->flags |= IOMAP_F_BOUNDARY;
 790         iomap->length = (u64)len << inode->i_blkbits;
 791
 792 out_release:
 793         release_metapath(&mp);
 794         bmap_unlock(ip, flags & IOMAP_WRITE);
 795 out:
 796         trace_gfs2_iomap_end(ip, iomap, ret);
 797         return ret;
 798
 799 do_alloc:
 800         if (flags & IOMAP_WRITE) {
 801                 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
 802         } else if (flags & IOMAP_REPORT) {
 803                 loff_t size = i_size_read(inode);
 804                 if (pos >= size)
 805                         ret = -ENOENT;
 806                 else if (height <= ip->i_height)
 807                         iomap->length = hole_size(inode, lblock, &mp);
 808                 else
 809                         iomap->length = size - pos;
 810         }
 811         goto out_release;
 812 }
 813
 814 /**
 815  * gfs2_block_map - Map a block from an inode to a disk block
 816  * @inode: The inode
 817  * @lblock: The logical block number
 818  * @bh_map: The bh to be mapped
 819  * @create: True if its ok to alloc blocks to satify the request
 820  *
 821  * Sets buffer_mapped() if successful, sets buffer_boundary() if a
 822  * read of metadata will be required before the next block can be
 823  * mapped. Sets buffer_new() if new blocks were allocated.
 824  *
 825  * Returns: errno
 826  */
 827
 828 int gfs2_block_map(struct inode *inode, sector_t lblock,
 829                    struct buffer_head *bh_map, int create)
 830 {
 831         struct gfs2_inode *ip = GFS2_I(inode);
 832         struct iomap iomap;
 833         int ret, flags = 0;
 834
 835         clear_buffer_mapped(bh_map);
 836         clear_buffer_new(bh_map);
 837         clear_buffer_boundary(bh_map);
 838         trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 839
 840         if (create)
 841                 flags |= IOMAP_WRITE;
 842         if (buffer_zeronew(bh_map))
 843                 flags |= IOMAP_ZERO;
 844         ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
 845                                bh_map->b_size, flags, &iomap);
 846         if (ret) {
 847                 if (!create && ret == -ENOENT) {
 848                         /* Return unmapped buffer beyond the end of file.  */
 849                         ret = 0;
 850                 }
 851                 goto out;
 852         }
 853
 854         if (iomap.length > bh_map->b_size) {
 855                 iomap.length = bh_map->b_size;
 856                 iomap.flags &= ~IOMAP_F_BOUNDARY;
 857         }
 858         if (iomap.addr != IOMAP_NULL_ADDR)
 859                 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
 860         bh_map->b_size = iomap.length;
 861         if (iomap.flags & IOMAP_F_BOUNDARY)
 862                 set_buffer_boundary(bh_map);
 863         if (iomap.flags & IOMAP_F_NEW)
 864                 set_buffer_new(bh_map);
 865
 866 out:
 867         trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 868         return ret;
 869 }
 870
 871 /*
 872  * Deprecated: do not use in new code
 873  */
 874 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 875 {
 876         struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 877         int ret;
 878         int create = *new;
 879
 880         BUG_ON(!extlen);
 881         BUG_ON(!dblock);
 882         BUG_ON(!new);
 883
 884         bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
 885         ret = gfs2_block_map(inode, lblock, &bh, create);
 886         *extlen = bh.b_size >> inode->i_blkbits;
 887         *dblock = bh.b_blocknr;
 888         if (buffer_new(&bh))
 889                 *new = 1;
 890         else
 891                 *new = 0;
 892         return ret;
 893 }
 894
 895 /**
 896  * gfs2_block_zero_range - Deal with zeroing out data
 897  *
 898  * This is partly borrowed from ext3.
 899  */
 900 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
 901                                  unsigned int length)
 902 {
 903         struct address_space *mapping = inode->i_mapping;
 904         struct gfs2_inode *ip = GFS2_I(inode);
 905         unsigned long index = from >> PAGE_SHIFT;
 906         unsigned offset = from & (PAGE_SIZE-1);
 907         unsigned blocksize, iblock, pos;
 908         struct buffer_head *bh;
 909         struct page *page;
 910         int err;
 911
 912         page = find_or_create_page(mapping, index, GFP_NOFS);
 913         if (!page)
 914                 return 0;
 915
 916         blocksize = inode->i_sb->s_blocksize;
 917         iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 918
 919         if (!page_has_buffers(page))
 920                 create_empty_buffers(page, blocksize, 0);
 921
 922         /* Find the buffer that contains "offset" */
 923         bh = page_buffers(page);
 924         pos = blocksize;
 925         while (offset >= pos) {
 926                 bh = bh->b_this_page;
 927                 iblock++;
 928                 pos += blocksize;
 929         }
 930
 931         err = 0;
 932
 933         if (!buffer_mapped(bh)) {
 934                 gfs2_block_map(inode, iblock, bh, 0);
 935                 /* unmapped? It's a hole - nothing to do */
 936                 if (!buffer_mapped(bh))
 937                         goto unlock;
 938         }
 939
 940         /* Ok, it's mapped. Make sure it's up-to-date */
 941         if (PageUptodate(page))
 942                 set_buffer_uptodate(bh);
 943
 944         if (!buffer_uptodate(bh)) {
 945                 err = -EIO;
 946                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 947                 wait_on_buffer(bh);
 948                 /* Uhhuh. Read error. Complain and punt. */
 949                 if (!buffer_uptodate(bh))
 950                         goto unlock;
 951                 err = 0;
 952         }
 953
 954         if (!gfs2_is_writeback(ip))
 955                 gfs2_trans_add_data(ip->i_gl, bh);
 956
 957         zero_user(page, offset, length);
 958         mark_buffer_dirty(bh);
 959 unlock:
 960         unlock_page(page);
 961         put_page(page);
 962         return err;
 963 }
 964
 965 #define GFS2_JTRUNC_REVOKES 8192
 966
 967 /**
 968  * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
 969  * @inode: The inode being truncated
 970  * @oldsize: The original (larger) size
 971  * @newsize: The new smaller size
 972  *
 973  * With jdata files, we have to journal a revoke for each block which is
 974  * truncated. As a result, we need to split this into separate transactions
 975  * if the number of pages being truncated gets too large.
 976  */
 977
 978 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
 979 {
 980         struct gfs2_sbd *sdp = GFS2_SB(inode);
 981         u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
 982         u64 chunk;
 983         int error;
 984
 985         while (oldsize != newsize) {
 986                 struct gfs2_trans *tr;
 987                 unsigned int offs;
 988
 989                 chunk = oldsize - newsize;
 990                 if (chunk > max_chunk)
 991                         chunk = max_chunk;
 992
 993                 offs = oldsize & ~PAGE_MASK;
 994                 if (offs && chunk > PAGE_SIZE)
 995                         chunk = offs + ((chunk - offs) & PAGE_MASK);
 996
 997                 truncate_pagecache(inode, oldsize - chunk);
 998                 oldsize -= chunk;
 999
1000                 tr = current->journal_info;
1001                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1002                         continue;
1003
1004                 gfs2_trans_end(sdp);
1005                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1006                 if (error)
1007                         return error;
1008         }
1009
1010         return 0;
1011 }
1012
1013 static int trunc_start(struct inode *inode, u64 newsize)
1014 {
1015         struct gfs2_inode *ip = GFS2_I(inode);
1016         struct gfs2_sbd *sdp = GFS2_SB(inode);
1017         struct buffer_head *dibh = NULL;
1018         int journaled = gfs2_is_jdata(ip);
1019         u64 oldsize = inode->i_size;
1020         int error;
1021
1022         if (journaled)
1023                 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1024         else
1025                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1026         if (error)
1027                 return error;
1028
1029         error = gfs2_meta_inode_buffer(ip, &dibh);
1030         if (error)
1031                 goto out;
1032
1033         gfs2_trans_add_meta(ip->i_gl, dibh);
1034
1035         if (gfs2_is_stuffed(ip)) {
1036                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1037         } else {
1038                 unsigned int blocksize = i_blocksize(inode);
1039                 unsigned int offs = newsize & (blocksize - 1);
1040                 if (offs) {
1041                         error = gfs2_block_zero_range(inode, newsize,
1042                                                       blocksize - offs);
1043                         if (error)
1044                                 goto out;
1045                 }
1046                 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1047         }
1048
1049         i_size_write(inode, newsize);
1050         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1051         gfs2_dinode_out(ip, dibh->b_data);
1052
1053         if (journaled)
1054                 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1055         else
1056                 truncate_pagecache(inode, newsize);
1057
1058 out:
1059         brelse(dibh);
1060         if (current->journal_info)
1061                 gfs2_trans_end(sdp);
1062         return error;
1063 }
1064
1065 /**
1066  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1067  * @ip: inode
1068  * @rg_gh: holder of resource group glock
1069  * @bh: buffer head to sweep
1070  * @start: starting point in bh
1071  * @end: end point in bh
1072  * @meta: true if bh points to metadata (rather than data)
1073  * @btotal: place to keep count of total blocks freed
1074  *
1075  * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1076  * free, and free them all. However, we do it one rgrp at a time. If this
1077  * block has references to multiple rgrps, we break it into individual
1078  * transactions. This allows other processes to use the rgrps while we're
1079  * focused on a single one, for better concurrency / performance.
1080  * At every transaction boundary, we rewrite the inode into the journal.
1081  * That way the bitmaps are kept consistent with the inode and we can recover
1082  * if we're interrupted by power-outages.
1083  *
1084  * Returns: 0, or return code if an error occurred.
1085  *          *btotal has the total number of blocks freed
1086  */
1087 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1088                               struct buffer_head *bh, __be64 *start, __be64 *end,
1089                               bool meta, u32 *btotal)
1090 {
1091         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1092         struct gfs2_rgrpd *rgd;
1093         struct gfs2_trans *tr;
1094         __be64 *p;
1095         int blks_outside_rgrp;
1096         u64 bn, bstart, isize_blks;
1097         s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1098         int ret = 0;
1099         bool buf_in_tr = false; /* buffer was added to transaction */
1100
1101 more_rgrps:
1102         rgd = NULL;
1103         if (gfs2_holder_initialized(rd_gh)) {
1104                 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1105                 gfs2_assert_withdraw(sdp,
1106                              gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1107         }
1108         blks_outside_rgrp = 0;
1109         bstart = 0;
1110         blen = 0;
1111
1112         for (p = start; p < end; p++) {
1113                 if (!*p)
1114                         continue;
1115                 bn = be64_to_cpu(*p);
1116
1117                 if (rgd) {
1118                         if (!rgrp_contains_block(rgd, bn)) {
1119                                 blks_outside_rgrp++;
1120                                 continue;
1121                         }
1122                 } else {
1123                         rgd = gfs2_blk2rgrpd(sdp, bn, true);
1124                         if (unlikely(!rgd)) {
1125                                 ret = -EIO;
1126                                 goto out;
1127                         }
1128                         ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1129                                                  0, rd_gh);
1130                         if (ret)
1131                                 goto out;
1132
1133                         /* Must be done with the rgrp glock held: */
1134                         if (gfs2_rs_active(&ip->i_res) &&
1135                             rgd == ip->i_res.rs_rbm.rgd)
1136                                 gfs2_rs_deltree(&ip->i_res);
1137                 }
1138
1139                 /* The size of our transactions will be unknown until we
1140                    actually process all the metadata blocks that relate to
1141                    the rgrp. So we estimate. We know it can't be more than
1142                    the dinode's i_blocks and we don't want to exceed the
1143                    journal flush threshold, sd_log_thresh2. */
1144                 if (current->journal_info == NULL) {
1145                         unsigned int jblocks_rqsted, revokes;
1146
1147                         jblocks_rqsted = rgd->rd_length + RES_DINODE +
1148                                 RES_INDIRECT;
1149                         isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1150                         if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1151                                 jblocks_rqsted +=
1152                                         atomic_read(&sdp->sd_log_thresh2);
1153                         else
1154                                 jblocks_rqsted += isize_blks;
1155                         revokes = jblocks_rqsted;
1156                         if (meta)
1157                                 revokes += end - start;
1158                         else if (ip->i_depth)
1159                                 revokes += sdp->sd_inptrs;
1160                         ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1161                         if (ret)
1162                                 goto out_unlock;
1163                         down_write(&ip->i_rw_mutex);
1164                 }
1165                 /* check if we will exceed the transaction blocks requested */
1166                 tr = current->journal_info;
1167                 if (tr->tr_num_buf_new + RES_STATFS +
1168                     RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1169                         /* We set blks_outside_rgrp to ensure the loop will
1170                            be repeated for the same rgrp, but with a new
1171                            transaction. */
1172                         blks_outside_rgrp++;
1173                         /* This next part is tricky. If the buffer was added
1174                            to the transaction, we've already set some block
1175                            pointers to 0, so we better follow through and free
1176                            them, or we will introduce corruption (so break).
1177                            This may be impossible, or at least rare, but I
1178                            decided to cover the case regardless.
1179
1180                            If the buffer was not added to the transaction
1181                            (this call), doing so would exceed our transaction
1182                            size, so we need to end the transaction and start a
1183                            new one (so goto). */
1184
1185                         if (buf_in_tr)
1186                                 break;
1187                         goto out_unlock;
1188                 }
1189
1190                 gfs2_trans_add_meta(ip->i_gl, bh);
1191                 buf_in_tr = true;
1192                 *p = 0;
1193                 if (bstart + blen == bn) {
1194                         blen++;
1195                         continue;
1196                 }
1197                 if (bstart) {
1198                         __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1199                         (*btotal) += blen;
1200                         gfs2_add_inode_blocks(&ip->i_inode, -blen);
1201                 }
1202                 bstart = bn;
1203                 blen = 1;
1204         }
1205         if (bstart) {
1206                 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1207                 (*btotal) += blen;
1208                 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1209         }
1210 out_unlock:
1211         if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1212                                             outside the rgrp we just processed,
1213                                             do it all over again. */
1214                 if (current->journal_info) {
1215                         struct buffer_head *dibh;
1216
1217                         ret = gfs2_meta_inode_buffer(ip, &dibh);
1218                         if (ret)
1219                                 goto out;
1220
1221                         /* Every transaction boundary, we rewrite the dinode
1222                            to keep its di_blocks current in case of failure. */
1223                         ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1224                                 current_time(&ip->i_inode);
1225                         gfs2_trans_add_meta(ip->i_gl, dibh);
1226                         gfs2_dinode_out(ip, dibh->b_data);
1227                         brelse(dibh);
1228                         up_write(&ip->i_rw_mutex);
1229                         gfs2_trans_end(sdp);
1230                 }
1231                 gfs2_glock_dq_uninit(rd_gh);
1232                 cond_resched();
1233                 goto more_rgrps;
1234         }
1235 out:
1236         return ret;
1237 }
1238
1239 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1240 {
1241         if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1242                 return false;
1243         return true;
1244 }
1245
1246 /**
1247  * find_nonnull_ptr - find a non-null pointer given a metapath and height
1248  * @mp: starting metapath
1249  * @h: desired height to search
1250  *
1251  * Assumes the metapath is valid (with buffers) out to height h.
1252  * Returns: true if a non-null pointer was found in the metapath buffer
1253  *          false if all remaining pointers are NULL in the buffer
1254  */
1255 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1256                              unsigned int h,
1257                              __u16 *end_list, unsigned int end_aligned)
1258 {
1259         struct buffer_head *bh = mp->mp_bh[h];
1260         __be64 *first, *ptr, *end;
1261
1262         first = metaptr1(h, mp);
1263         ptr = first + mp->mp_list[h];
1264         end = (__be64 *)(bh->b_data + bh->b_size);
1265         if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1266                 bool keep_end = h < end_aligned;
1267                 end = first + end_list[h] + keep_end;
1268         }
1269
1270         while (ptr < end) {
1271                 if (*ptr) { /* if we have a non-null pointer */
1272                         mp->mp_list[h] = ptr - first;
1273                         h++;
1274                         if (h < GFS2_MAX_META_HEIGHT)
1275                                 mp->mp_list[h] = 0;
1276                         return true;
1277                 }
1278                 ptr++;
1279         }
1280         return false;
1281 }
1282
1283 enum dealloc_states {
1284         DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1285         DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1286         DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1287         DEALLOC_DONE = 3,       /* process complete */
1288 };
1289
1290 static inline void
1291 metapointer_range(struct metapath *mp, int height,
1292                   __u16 *start_list, unsigned int start_aligned,
1293                   __u16 *end_list, unsigned int end_aligned,
1294                   __be64 **start, __be64 **end)
1295 {
1296         struct buffer_head *bh = mp->mp_bh[height];
1297         __be64 *first;
1298
1299         first = metaptr1(height, mp);
1300         *start = first;
1301         if (mp_eq_to_hgt(mp, start_list, height)) {
1302                 bool keep_start = height < start_aligned;
1303                 *start = first + start_list[height] + keep_start;
1304         }
1305         *end = (__be64 *)(bh->b_data + bh->b_size);
1306         if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1307                 bool keep_end = height < end_aligned;
1308                 *end = first + end_list[height] + keep_end;
1309         }
1310 }
1311
1312 static inline bool walk_done(struct gfs2_sbd *sdp,
1313                              struct metapath *mp, int height,
1314                              __u16 *end_list, unsigned int end_aligned)
1315 {
1316         __u16 end;
1317
1318         if (end_list) {
1319                 bool keep_end = height < end_aligned;
1320                 if (!mp_eq_to_hgt(mp, end_list, height))
1321                         return false;
1322                 end = end_list[height] + keep_end;
1323         } else
1324                 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1325         return mp->mp_list[height] >= end;
1326 }
1327
1328 /**
1329  * punch_hole - deallocate blocks in a file
1330  * @ip: inode to truncate
1331  * @offset: the start of the hole
1332  * @length: the size of the hole (or 0 for truncate)
1333  *
1334  * Punch a hole into a file or truncate a file at a given position.  This
1335  * function operates in whole blocks (@offset and @length are rounded
1336  * accordingly); partially filled blocks must be cleared otherwise.
1337  *
1338  * This function works from the bottom up, and from the right to the left. In
1339  * other words, it strips off the highest layer (data) before stripping any of
1340  * the metadata. Doing it this way is best in case the operation is interrupted
1341  * by power failure, etc.  The dinode is rewritten in every transaction to
1342  * guarantee integrity.
1343  */
1344 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1345 {
1346         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1347         struct metapath mp = {};
1348         struct buffer_head *dibh, *bh;
1349         struct gfs2_holder rd_gh;
1350         unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1351         u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1352         __u16 start_list[GFS2_MAX_META_HEIGHT];
1353         __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1354         unsigned int start_aligned, uninitialized_var(end_aligned);
1355         unsigned int strip_h = ip->i_height - 1;
1356         u32 btotal = 0;
1357         int ret, state;
1358         int mp_h; /* metapath buffers are read in to this height */
1359         u64 prev_bnr = 0;
1360         __be64 *start, *end;
1361
1362         /*
1363          * The start position of the hole is defined by lblock, start_list, and
1364          * start_aligned.  The end position of the hole is defined by lend,
1365          * end_list, and end_aligned.
1366          *
1367          * start_aligned and end_aligned define down to which height the start
1368          * and end positions are aligned to the metadata tree (i.e., the
1369          * position is a multiple of the metadata granularity at the height
1370          * above).  This determines at which heights additional meta pointers
1371          * needs to be preserved for the remaining data.
1372          */
1373
1374         if (length) {
1375                 u64 maxsize = sdp->sd_heightsize[ip->i_height];
1376                 u64 end_offset = offset + length;
1377                 u64 lend;
1378
1379                 /*
1380                  * Clip the end at the maximum file size for the given height:
1381                  * that's how far the metadata goes; files bigger than that
1382                  * will have additional layers of indirection.
1383                  */
1384                 if (end_offset > maxsize)
1385                         end_offset = maxsize;
1386                 lend = end_offset >> bsize_shift;
1387
1388                 if (lblock >= lend)
1389                         return 0;
1390
1391                 find_metapath(sdp, lend, &mp, ip->i_height);
1392                 end_list = __end_list;
1393                 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1394
1395                 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1396                         if (end_list[mp_h])
1397                                 break;
1398                 }
1399                 end_aligned = mp_h;
1400         }
1401
1402         find_metapath(sdp, lblock, &mp, ip->i_height);
1403         memcpy(start_list, mp.mp_list, sizeof(start_list));
1404
1405         for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1406                 if (start_list[mp_h])
1407                         break;
1408         }
1409         start_aligned = mp_h;
1410
1411         ret = gfs2_meta_inode_buffer(ip, &dibh);
1412         if (ret)
1413                 return ret;
1414
1415         mp.mp_bh[0] = dibh;
1416         ret = lookup_metapath(ip, &mp);
1417         if (ret)
1418                 goto out_metapath;
1419
1420         /* issue read-ahead on metadata */
1421         for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1422                 metapointer_range(&mp, mp_h, start_list, start_aligned,
1423                                   end_list, end_aligned, &start, &end);
1424                 gfs2_metapath_ra(ip->i_gl, start, end);
1425         }
1426
1427         if (mp.mp_aheight == ip->i_height)
1428                 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1429         else
1430                 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1431
1432         ret = gfs2_rindex_update(sdp);
1433         if (ret)
1434                 goto out_metapath;
1435
1436         ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1437         if (ret)
1438                 goto out_metapath;
1439         gfs2_holder_mark_uninitialized(&rd_gh);
1440
1441         mp_h = strip_h;
1442
1443         while (state != DEALLOC_DONE) {
1444                 switch (state) {
1445                 /* Truncate a full metapath at the given strip height.
1446                  * Note that strip_h == mp_h in order to be in this state. */
1447                 case DEALLOC_MP_FULL:
1448                         bh = mp.mp_bh[mp_h];
1449                         gfs2_assert_withdraw(sdp, bh);
1450                         if (gfs2_assert_withdraw(sdp,
1451                                                  prev_bnr != bh->b_blocknr)) {
1452                                 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1453                                        "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1454                                        sdp->sd_fsname,
1455                                        (unsigned long long)ip->i_no_addr,
1456                                        prev_bnr, ip->i_height, strip_h, mp_h);
1457                         }
1458                         prev_bnr = bh->b_blocknr;
1459
1460                         if (gfs2_metatype_check(sdp, bh,
1461                                                 (mp_h ? GFS2_METATYPE_IN :
1462                                                         GFS2_METATYPE_DI))) {
1463                                 ret = -EIO;
1464                                 goto out;
1465                         }
1466
1467                         /*
1468                          * Below, passing end_aligned as 0 gives us the
1469                          * metapointer range excluding the end point: the end
1470                          * point is the first metapath we must not deallocate!
1471                          */
1472
1473                         metapointer_range(&mp, mp_h, start_list, start_aligned,
1474                                           end_list, 0 /* end_aligned */,
1475                                           &start, &end);
1476                         ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1477                                                  start, end,
1478                                                  mp_h != ip->i_height - 1,
1479                                                  &btotal);
1480
1481                         /* If we hit an error or just swept dinode buffer,
1482                            just exit. */
1483                         if (ret || !mp_h) {
1484                                 state = DEALLOC_DONE;
1485                                 break;
1486                         }
1487                         state = DEALLOC_MP_LOWER;
1488                         break;
1489
1490                 /* lower the metapath strip height */
1491                 case DEALLOC_MP_LOWER:
1492                         /* We're done with the current buffer, so release it,
1493                            unless it's the dinode buffer. Then back up to the
1494                            previous pointer. */
1495                         if (mp_h) {
1496                                 brelse(mp.mp_bh[mp_h]);
1497                                 mp.mp_bh[mp_h] = NULL;
1498                         }
1499                         /* If we can't get any lower in height, we've stripped
1500                            off all we can. Next step is to back up and start
1501                            stripping the previous level of metadata. */
1502                         if (mp_h == 0) {
1503                                 strip_h--;
1504                                 memcpy(mp.mp_list, start_list, sizeof(start_list));
1505                                 mp_h = strip_h;
1506                                 state = DEALLOC_FILL_MP;
1507                                 break;
1508                         }
1509                         mp.mp_list[mp_h] = 0;
1510                         mp_h--; /* search one metadata height down */
1511                         mp.mp_list[mp_h]++;
1512                         if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1513                                 break;
1514                         /* Here we've found a part of the metapath that is not
1515                          * allocated. We need to search at that height for the
1516                          * next non-null pointer. */
1517                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1518                                 state = DEALLOC_FILL_MP;
1519                                 mp_h++;
1520                         }
1521                         /* No more non-null pointers at this height. Back up
1522                            to the previous height and try again. */
1523                         break; /* loop around in the same state */
1524
1525                 /* Fill the metapath with buffers to the given height. */
1526                 case DEALLOC_FILL_MP:
1527                         /* Fill the buffers out to the current height. */
1528                         ret = fillup_metapath(ip, &mp, mp_h);
1529                         if (ret < 0)
1530                                 goto out;
1531
1532                         /* issue read-ahead on metadata */
1533                         if (mp.mp_aheight > 1) {
1534                                 for (; ret > 1; ret--) {
1535                                         metapointer_range(&mp, mp.mp_aheight - ret,
1536                                                           start_list, start_aligned,
1537                                                           end_list, end_aligned,
1538                                                           &start, &end);
1539                                         gfs2_metapath_ra(ip->i_gl, start, end);
1540                                 }
1541                         }
1542
1543                         /* If buffers found for the entire strip height */
1544                         if (mp.mp_aheight - 1 == strip_h) {
1545                                 state = DEALLOC_MP_FULL;
1546                                 break;
1547                         }
1548                         if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1549                                 mp_h = mp.mp_aheight - 1;
1550
1551                         /* If we find a non-null block pointer, crawl a bit
1552                            higher up in the metapath and try again, otherwise
1553                            we need to look lower for a new starting point. */
1554                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1555                                 mp_h++;
1556                         else
1557                                 state = DEALLOC_MP_LOWER;
1558                         break;
1559                 }
1560         }
1561
1562         if (btotal) {
1563                 if (current->journal_info == NULL) {
1564                         ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1565                                                RES_QUOTA, 0);
1566                         if (ret)
1567                                 goto out;
1568                         down_write(&ip->i_rw_mutex);
1569                 }
1570                 gfs2_statfs_change(sdp, 0, +btotal, 0);
1571                 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1572                                   ip->i_inode.i_gid);
1573                 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1574                 gfs2_trans_add_meta(ip->i_gl, dibh);
1575                 gfs2_dinode_out(ip, dibh->b_data);
1576                 up_write(&ip->i_rw_mutex);
1577                 gfs2_trans_end(sdp);
1578         }
1579
1580 out:
1581         if (gfs2_holder_initialized(&rd_gh))
1582                 gfs2_glock_dq_uninit(&rd_gh);
1583         if (current->journal_info) {
1584                 up_write(&ip->i_rw_mutex);
1585                 gfs2_trans_end(sdp);
1586                 cond_resched();
1587         }
1588         gfs2_quota_unhold(ip);
1589 out_metapath:
1590         release_metapath(&mp);
1591         return ret;
1592 }
1593
1594 static int trunc_end(struct gfs2_inode *ip)
1595 {
1596         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1597         struct buffer_head *dibh;
1598         int error;
1599
1600         error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1601         if (error)
1602                 return error;
1603
1604         down_write(&ip->i_rw_mutex);
1605
1606         error = gfs2_meta_inode_buffer(ip, &dibh);
1607         if (error)
1608                 goto out;
1609
1610         if (!i_size_read(&ip->i_inode)) {
1611                 ip->i_height = 0;
1612                 ip->i_goal = ip->i_no_addr;
1613                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1614                 gfs2_ordered_del_inode(ip);
1615         }
1616         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1617         ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1618
1619         gfs2_trans_add_meta(ip->i_gl, dibh);
1620         gfs2_dinode_out(ip, dibh->b_data);
1621         brelse(dibh);
1622
1623 out:
1624         up_write(&ip->i_rw_mutex);
1625         gfs2_trans_end(sdp);
1626         return error;
1627 }
1628
1629 /**
1630  * do_shrink - make a file smaller
1631  * @inode: the inode
1632  * @newsize: the size to make the file
1633  *
1634  * Called with an exclusive lock on @inode. The @size must
1635  * be equal to or smaller than the current inode size.
1636  *
1637  * Returns: errno
1638  */
1639
1640 static int do_shrink(struct inode *inode, u64 newsize)
1641 {
1642         struct gfs2_inode *ip = GFS2_I(inode);
1643         int error;
1644
1645         error = trunc_start(inode, newsize);
1646         if (error < 0)
1647                 return error;
1648         if (gfs2_is_stuffed(ip))
1649                 return 0;
1650
1651         error = punch_hole(ip, newsize, 0);
1652         if (error == 0)
1653                 error = trunc_end(ip);
1654
1655         return error;
1656 }
1657
1658 void gfs2_trim_blocks(struct inode *inode)
1659 {
1660         int ret;
1661
1662         ret = do_shrink(inode, inode->i_size);
1663         WARN_ON(ret != 0);
1664 }
1665
1666 /**
1667  * do_grow - Touch and update inode size
1668  * @inode: The inode
1669  * @size: The new size
1670  *
1671  * This function updates the timestamps on the inode and
1672  * may also increase the size of the inode. This function
1673  * must not be called with @size any smaller than the current
1674  * inode size.
1675  *
1676  * Although it is not strictly required to unstuff files here,
1677  * earlier versions of GFS2 have a bug in the stuffed file reading
1678  * code which will result in a buffer overrun if the size is larger
1679  * than the max stuffed file size. In order to prevent this from
1680  * occurring, such files are unstuffed, but in other cases we can
1681  * just update the inode size directly.
1682  *
1683  * Returns: 0 on success, or -ve on error
1684  */
1685
1686 static int do_grow(struct inode *inode, u64 size)
1687 {
1688         struct gfs2_inode *ip = GFS2_I(inode);
1689         struct gfs2_sbd *sdp = GFS2_SB(inode);
1690         struct gfs2_alloc_parms ap = { .target = 1, };
1691         struct buffer_head *dibh;
1692         int error;
1693         int unstuff = 0;
1694
1695         if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
1696                 error = gfs2_quota_lock_check(ip, &ap);
1697                 if (error)
1698                         return error;
1699
1700                 error = gfs2_inplace_reserve(ip, &ap);
1701                 if (error)
1702                         goto do_grow_qunlock;
1703                 unstuff = 1;
1704         }
1705
1706         error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1707                                  (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1708                                   0 : RES_QUOTA), 0);
1709         if (error)
1710                 goto do_grow_release;
1711
1712         if (unstuff) {
1713                 error = gfs2_unstuff_dinode(ip, NULL);
1714                 if (error)
1715                         goto do_end_trans;
1716         }
1717
1718         error = gfs2_meta_inode_buffer(ip, &dibh);
1719         if (error)
1720                 goto do_end_trans;
1721
1722         i_size_write(inode, size);
1723         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1724         gfs2_trans_add_meta(ip->i_gl, dibh);
1725         gfs2_dinode_out(ip, dibh->b_data);
1726         brelse(dibh);
1727
1728 do_end_trans:
1729         gfs2_trans_end(sdp);
1730 do_grow_release:
1731         if (unstuff) {
1732                 gfs2_inplace_release(ip);
1733 do_grow_qunlock:
1734                 gfs2_quota_unlock(ip);
1735         }
1736         return error;
1737 }
1738
1739 /**
1740  * gfs2_setattr_size - make a file a given size
1741  * @inode: the inode
1742  * @newsize: the size to make the file
1743  *
1744  * The file size can grow, shrink, or stay the same size. This
1745  * is called holding i_mutex and an exclusive glock on the inode
1746  * in question.
1747  *
1748  * Returns: errno
1749  */
1750
1751 int gfs2_setattr_size(struct inode *inode, u64 newsize)
1752 {
1753         struct gfs2_inode *ip = GFS2_I(inode);
1754         int ret;
1755
1756         BUG_ON(!S_ISREG(inode->i_mode));
1757
1758         ret = inode_newsize_ok(inode, newsize);
1759         if (ret)
1760                 return ret;
1761
1762         inode_dio_wait(inode);
1763
1764         ret = gfs2_rsqa_alloc(ip);
1765         if (ret)
1766                 goto out;
1767
1768         if (newsize >= inode->i_size) {
1769                 ret = do_grow(inode, newsize);
1770                 goto out;
1771         }
1772
1773         ret = do_shrink(inode, newsize);
1774 out:
1775         gfs2_rsqa_delete(ip, NULL);
1776         return ret;
1777 }
1778
1779 int gfs2_truncatei_resume(struct gfs2_inode *ip)
1780 {
1781         int error;
1782         error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
1783         if (!error)
1784                 error = trunc_end(ip);
1785         return error;
1786 }
1787
1788 int gfs2_file_dealloc(struct gfs2_inode *ip)
1789 {
1790         return punch_hole(ip, 0, 0);
1791 }
1792
1793 /**
1794  * gfs2_free_journal_extents - Free cached journal bmap info
1795  * @jd: The journal
1796  *
1797  */
1798
1799 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1800 {
1801         struct gfs2_journal_extent *jext;
1802
1803         while(!list_empty(&jd->extent_list)) {
1804                 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1805                 list_del(&jext->list);
1806                 kfree(jext);
1807         }
1808 }
1809
1810 /**
1811  * gfs2_add_jextent - Add or merge a new extent to extent cache
1812  * @jd: The journal descriptor
1813  * @lblock: The logical block at start of new extent
1814  * @dblock: The physical block at start of new extent
1815  * @blocks: Size of extent in fs blocks
1816  *
1817  * Returns: 0 on success or -ENOMEM
1818  */
1819
1820 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1821 {
1822         struct gfs2_journal_extent *jext;
1823
1824         if (!list_empty(&jd->extent_list)) {
1825                 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1826                 if ((jext->dblock + jext->blocks) == dblock) {
1827                         jext->blocks += blocks;
1828                         return 0;
1829                 }
1830         }
1831
1832         jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1833         if (jext == NULL)
1834                 return -ENOMEM;
1835         jext->dblock = dblock;
1836         jext->lblock = lblock;
1837         jext->blocks = blocks;
1838         list_add_tail(&jext->list, &jd->extent_list);
1839         jd->nr_extents++;
1840         return 0;
1841 }
1842
1843 /**
1844  * gfs2_map_journal_extents - Cache journal bmap info
1845  * @sdp: The super block
1846  * @jd: The journal to map
1847  *
1848  * Create a reusable "extent" mapping from all logical
1849  * blocks to all physical blocks for the given journal.  This will save
1850  * us time when writing journal blocks.  Most journals will have only one
1851  * extent that maps all their logical blocks.  That's because gfs2.mkfs
1852  * arranges the journal blocks sequentially to maximize performance.
1853  * So the extent would map the first block for the entire file length.
1854  * However, gfs2_jadd can happen while file activity is happening, so
1855  * those journals may not be sequential.  Less likely is the case where
1856  * the users created their own journals by mounting the metafs and
1857  * laying it out.  But it's still possible.  These journals might have
1858  * several extents.
1859  *
1860  * Returns: 0 on success, or error on failure
1861  */
1862
1863 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1864 {
1865         u64 lblock = 0;
1866         u64 lblock_stop;
1867         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1868         struct buffer_head bh;
1869         unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1870         u64 size;
1871         int rc;
1872
1873         lblock_stop = i_size_read(jd->jd_inode) >> shift;
1874         size = (lblock_stop - lblock) << shift;
1875         jd->nr_extents = 0;
1876         WARN_ON(!list_empty(&jd->extent_list));
1877
1878         do {
1879                 bh.b_state = 0;
1880                 bh.b_blocknr = 0;
1881                 bh.b_size = size;
1882                 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1883                 if (rc || !buffer_mapped(&bh))
1884                         goto fail;
1885                 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1886                 if (rc)
1887                         goto fail;
1888                 size -= bh.b_size;
1889                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1890         } while(size > 0);
1891
1892         fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1893                 jd->nr_extents);
1894         return 0;
1895
1896 fail:
1897         fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1898                 rc, jd->jd_jid,
1899                 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1900                 jd->nr_extents);
1901         fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1902                 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1903                 bh.b_state, (unsigned long long)bh.b_size);
1904         gfs2_free_journal_extents(jd);
1905         return rc;
1906 }
1907
1908 /**
1909  * gfs2_write_alloc_required - figure out if a write will require an allocation
1910  * @ip: the file being written to
1911  * @offset: the offset to write to
1912  * @len: the number of bytes being written
1913  *
1914  * Returns: 1 if an alloc is required, 0 otherwise
1915  */
1916
1917 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1918                               unsigned int len)
1919 {
1920         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1921         struct buffer_head bh;
1922         unsigned int shift;
1923         u64 lblock, lblock_stop, size;
1924         u64 end_of_file;
1925
1926         if (!len)
1927                 return 0;
1928
1929         if (gfs2_is_stuffed(ip)) {
1930                 if (offset + len > gfs2_max_stuffed_size(ip))
1931                         return 1;
1932                 return 0;
1933         }
1934
1935         shift = sdp->sd_sb.sb_bsize_shift;
1936         BUG_ON(gfs2_is_dir(ip));
1937         end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
1938         lblock = offset >> shift;
1939         lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1940         if (lblock_stop > end_of_file)
1941                 return 1;
1942
1943         size = (lblock_stop - lblock) << shift;
1944         do {
1945                 bh.b_state = 0;
1946                 bh.b_size = size;
1947                 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1948                 if (!buffer_mapped(&bh))
1949                         return 1;
1950                 size -= bh.b_size;
1951                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1952         } while(size > 0);
1953
1954         return 0;
1955 }
1956
1957 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
1958 {
1959         struct gfs2_inode *ip = GFS2_I(inode);
1960         struct buffer_head *dibh;
1961         int error;
1962
1963         if (offset >= inode->i_size)
1964                 return 0;
1965         if (offset + length > inode->i_size)
1966                 length = inode->i_size - offset;
1967
1968         error = gfs2_meta_inode_buffer(ip, &dibh);
1969         if (error)
1970                 return error;
1971         gfs2_trans_add_meta(ip->i_gl, dibh);
1972         memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
1973                length);
1974         brelse(dibh);
1975         return 0;
1976 }
1977
1978 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
1979                                          loff_t length)
1980 {
1981         struct gfs2_sbd *sdp = GFS2_SB(inode);
1982         loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1983         int error;
1984
1985         while (length) {
1986                 struct gfs2_trans *tr;
1987                 loff_t chunk;
1988                 unsigned int offs;
1989
1990                 chunk = length;
1991                 if (chunk > max_chunk)
1992                         chunk = max_chunk;
1993
1994                 offs = offset & ~PAGE_MASK;
1995                 if (offs && chunk > PAGE_SIZE)
1996                         chunk = offs + ((chunk - offs) & PAGE_MASK);
1997
1998                 truncate_pagecache_range(inode, offset, chunk);
1999                 offset += chunk;
2000                 length -= chunk;
2001
2002                 tr = current->journal_info;
2003                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2004                         continue;
2005
2006                 gfs2_trans_end(sdp);
2007                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2008                 if (error)
2009                         return error;
2010         }
2011         return 0;
2012 }
2013
2014 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2015 {
2016         struct inode *inode = file_inode(file);
2017         struct gfs2_inode *ip = GFS2_I(inode);
2018         struct gfs2_sbd *sdp = GFS2_SB(inode);
2019         int error;
2020
2021         if (gfs2_is_jdata(ip))
2022                 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2023                                          GFS2_JTRUNC_REVOKES);
2024         else
2025                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2026         if (error)
2027                 return error;
2028
2029         if (gfs2_is_stuffed(ip)) {
2030                 error = stuffed_zero_range(inode, offset, length);
2031                 if (error)
2032                         goto out;
2033         } else {
2034                 unsigned int start_off, end_off, blocksize;
2035
2036                 blocksize = i_blocksize(inode);
2037                 start_off = offset & (blocksize - 1);
2038                 end_off = (offset + length) & (blocksize - 1);
2039                 if (start_off) {
2040                         unsigned int len = length;
2041                         if (length > blocksize - start_off)
2042                                 len = blocksize - start_off;
2043                         error = gfs2_block_zero_range(inode, offset, len);
2044                         if (error)
2045                                 goto out;
2046                         if (start_off + length < blocksize)
2047                                 end_off = 0;
2048                 }
2049                 if (end_off) {
2050                         error = gfs2_block_zero_range(inode,
2051                                 offset + length - end_off, end_off);
2052                         if (error)
2053                                 goto out;
2054                 }
2055         }
2056
2057         if (gfs2_is_jdata(ip)) {
2058                 BUG_ON(!current->journal_info);
2059                 gfs2_journaled_truncate_range(inode, offset, length);
2060         } else
2061                 truncate_pagecache_range(inode, offset, offset + length - 1);
2062
2063         file_update_time(file);
2064         mark_inode_dirty(inode);
2065
2066         if (current->journal_info)
2067                 gfs2_trans_end(sdp);
2068
2069         if (!gfs2_is_stuffed(ip))
2070                 error = punch_hole(ip, offset, length);
2071
2072 out:
2073         if (current->journal_info)
2074                 gfs2_trans_end(sdp);
2075         return error;
2076 }