fs/xfs/xfs_rw.c

   1 /*
   2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_types.h"
  21 #include "xfs_bit.h"
  22 #include "xfs_log.h"
  23 #include "xfs_inum.h"
  24 #include "xfs_trans.h"
  25 #include "xfs_sb.h"
  26 #include "xfs_ag.h"
  27 #include "xfs_dir2.h"
  28 #include "xfs_dmapi.h"
  29 #include "xfs_mount.h"
  30 #include "xfs_bmap_btree.h"
  31 #include "xfs_alloc_btree.h"
  32 #include "xfs_ialloc_btree.h"
  33 #include "xfs_dir2_sf.h"
  34 #include "xfs_attr_sf.h"
  35 #include "xfs_dinode.h"
  36 #include "xfs_inode.h"
  37 #include "xfs_inode_item.h"
  38 #include "xfs_itable.h"
  39 #include "xfs_btree.h"
  40 #include "xfs_alloc.h"
  41 #include "xfs_ialloc.h"
  42 #include "xfs_attr.h"
  43 #include "xfs_bmap.h"
  44 #include "xfs_error.h"
  45 #include "xfs_buf_item.h"
  46 #include "xfs_rw.h"
  47 #include "xfs_trace.h"
  48
  49 /*
  50  * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
  51  * which clears the setuid and setgid bits when a file is written.
  52  */
  53 int
  54 xfs_write_clear_setuid(
  55         xfs_inode_t     *ip)
  56 {
  57         xfs_mount_t     *mp;
  58         xfs_trans_t     *tp;
  59         int             error;
  60
  61         mp = ip->i_mount;
  62         tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
  63         if ((error = xfs_trans_reserve(tp, 0,
  64                                       XFS_WRITEID_LOG_RES(mp),
  65                                       0, 0, 0))) {
  66                 xfs_trans_cancel(tp, 0);
  67                 return error;
  68         }
  69         xfs_ilock(ip, XFS_ILOCK_EXCL);
  70         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  71         xfs_trans_ihold(tp, ip);
  72         ip->i_d.di_mode &= ~S_ISUID;
  73
  74         /*
  75          * Note that we don't have to worry about mandatory
  76          * file locking being disabled here because we only
  77          * clear the S_ISGID bit if the Group execute bit is
  78          * on, but if it was on then mandatory locking wouldn't
  79          * have been enabled.
  80          */
  81         if (ip->i_d.di_mode & S_IXGRP) {
  82                 ip->i_d.di_mode &= ~S_ISGID;
  83         }
  84         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
  85         xfs_trans_set_sync(tp);
  86         error = xfs_trans_commit(tp, 0);
  87         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  88         return 0;
  89 }
  90
  91 /*
  92  * Force a shutdown of the filesystem instantly while keeping
  93  * the filesystem consistent. We don't do an unmount here; just shutdown
  94  * the shop, make sure that absolutely nothing persistent happens to
  95  * this filesystem after this point.
  96  */
  97 void
  98 xfs_do_force_shutdown(
  99         xfs_mount_t     *mp,
 100         int             flags,
 101         char            *fname,
 102         int             lnnum)
 103 {
 104         int             logerror;
 105
 106         logerror = flags & SHUTDOWN_LOG_IO_ERROR;
 107
 108         if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 109                 cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from "
 110                                  "line %d of file %s.  Return address = 0x%p",
 111                         mp->m_fsname, flags, lnnum, fname, __return_address);
 112         }
 113         /*
 114          * No need to duplicate efforts.
 115          */
 116         if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
 117                 return;
 118
 119         /*
 120          * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
 121          * queue up anybody new on the log reservations, and wakes up
 122          * everybody who's sleeping on log reservations to tell them
 123          * the bad news.
 124          */
 125         if (xfs_log_force_umount(mp, logerror))
 126                 return;
 127
 128         if (flags & SHUTDOWN_CORRUPT_INCORE) {
 129                 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp,
 130     "Corruption of in-memory data detected.  Shutting down filesystem: %s",
 131                         mp->m_fsname);
 132                 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
 133                         xfs_stack_trace();
 134                 }
 135         } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 136                 if (logerror) {
 137                         xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp,
 138                 "Log I/O Error Detected.  Shutting down filesystem: %s",
 139                                 mp->m_fsname);
 140                 } else if (flags & SHUTDOWN_DEVICE_REQ) {
 141                         xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
 142                 "All device paths lost.  Shutting down filesystem: %s",
 143                                 mp->m_fsname);
 144                 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
 145                         xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
 146                 "I/O Error Detected.  Shutting down filesystem: %s",
 147                                 mp->m_fsname);
 148                 }
 149         }
 150         if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
 151                 cmn_err(CE_ALERT, "Please umount the filesystem, "
 152                                   "and rectify the problem(s)");
 153         }
 154 }
 155
 156
 157 /*
 158  * Called when we want to stop a buffer from getting written or read.
 159  * We attach the EIO error, muck with its flags, and call biodone
 160  * so that the proper iodone callbacks get called.
 161  */
 162 int
 163 xfs_bioerror(
 164         xfs_buf_t *bp)
 165 {
 166
 167 #ifdef XFSERRORDEBUG
 168         ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
 169 #endif
 170
 171         /*
 172          * No need to wait until the buffer is unpinned.
 173          * We aren't flushing it.
 174          */
 175         XFS_BUF_ERROR(bp, EIO);
 176         /*
 177          * We're calling biodone, so delete B_DONE flag. Either way
 178          * we have to call the iodone callback, and calling biodone
 179          * probably is the best way since it takes care of
 180          * GRIO as well.
 181          */
 182         XFS_BUF_UNREAD(bp);
 183         XFS_BUF_UNDELAYWRITE(bp);
 184         XFS_BUF_UNDONE(bp);
 185         XFS_BUF_STALE(bp);
 186
 187         XFS_BUF_CLR_BDSTRAT_FUNC(bp);
 188         xfs_biodone(bp);
 189
 190         return (EIO);
 191 }
 192
 193 /*
 194  * Same as xfs_bioerror, except that we are releasing the buffer
 195  * here ourselves, and avoiding the biodone call.
 196  * This is meant for userdata errors; metadata bufs come with
 197  * iodone functions attached, so that we can track down errors.
 198  */
 199 int
 200 xfs_bioerror_relse(
 201         xfs_buf_t *bp)
 202 {
 203         int64_t fl;
 204
 205         ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
 206         ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
 207
 208         fl = XFS_BUF_BFLAGS(bp);
 209         /*
 210          * No need to wait until the buffer is unpinned.
 211          * We aren't flushing it.
 212          *
 213          * chunkhold expects B_DONE to be set, whether
 214          * we actually finish the I/O or not. We don't want to
 215          * change that interface.
 216          */
 217         XFS_BUF_UNREAD(bp);
 218         XFS_BUF_UNDELAYWRITE(bp);
 219         XFS_BUF_DONE(bp);
 220         XFS_BUF_STALE(bp);
 221         XFS_BUF_CLR_IODONE_FUNC(bp);
 222         XFS_BUF_CLR_BDSTRAT_FUNC(bp);
 223         if (!(fl & XFS_B_ASYNC)) {
 224                 /*
 225                  * Mark b_error and B_ERROR _both_.
 226                  * Lot's of chunkcache code assumes that.
 227                  * There's no reason to mark error for
 228                  * ASYNC buffers.
 229                  */
 230                 XFS_BUF_ERROR(bp, EIO);
 231                 XFS_BUF_FINISH_IOWAIT(bp);
 232         } else {
 233                 xfs_buf_relse(bp);
 234         }
 235         return (EIO);
 236 }
 237
 238 /*
 239  * Prints out an ALERT message about I/O error.
 240  */
 241 void
 242 xfs_ioerror_alert(
 243         char                    *func,
 244         struct xfs_mount        *mp,
 245         xfs_buf_t               *bp,
 246         xfs_daddr_t             blkno)
 247 {
 248         cmn_err(CE_ALERT,
 249  "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
 250  "       (\"%s\") error %d buf count %zd",
 251                 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
 252                 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
 253                 (__uint64_t)blkno, func,
 254                 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
 255 }
 256
 257 /*
 258  * This isn't an absolute requirement, but it is
 259  * just a good idea to call xfs_read_buf instead of
 260  * directly doing a read_buf call. For one, we shouldn't
 261  * be doing this disk read if we are in SHUTDOWN state anyway,
 262  * so this stops that from happening. Secondly, this does all
 263  * the error checking stuff and the brelse if appropriate for
 264  * the caller, so the code can be a little leaner.
 265  */
 266
 267 int
 268 xfs_read_buf(
 269         struct xfs_mount *mp,
 270         xfs_buftarg_t    *target,
 271         xfs_daddr_t      blkno,
 272         int              len,
 273         uint             flags,
 274         xfs_buf_t        **bpp)
 275 {
 276         xfs_buf_t        *bp;
 277         int              error;
 278
 279         if (!flags)
 280                 flags = XBF_LOCK | XBF_MAPPED;
 281
 282         bp = xfs_buf_read(target, blkno, len, flags);
 283         if (!bp)
 284                 return XFS_ERROR(EIO);
 285         error = XFS_BUF_GETERROR(bp);
 286         if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
 287                 *bpp = bp;
 288         } else {
 289                 *bpp = NULL;
 290                 if (error) {
 291                         xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp));
 292                 } else {
 293                         error = XFS_ERROR(EIO);
 294                 }
 295                 if (bp) {
 296                         XFS_BUF_UNDONE(bp);
 297                         XFS_BUF_UNDELAYWRITE(bp);
 298                         XFS_BUF_STALE(bp);
 299                         /*
 300                          * brelse clears B_ERROR and b_error
 301                          */
 302                         xfs_buf_relse(bp);
 303                 }
 304         }
 305         return (error);
 306 }
 307
 308 /*
 309  * Wrapper around bwrite() so that we can trap
 310  * write errors, and act accordingly.
 311  */
 312 int
 313 xfs_bwrite(
 314         struct xfs_mount *mp,
 315         struct xfs_buf   *bp)
 316 {
 317         int     error;
 318
 319         /*
 320          * XXXsup how does this work for quotas.
 321          */
 322         XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
 323         bp->b_mount = mp;
 324         XFS_BUF_WRITE(bp);
 325
 326         if ((error = XFS_bwrite(bp))) {
 327                 ASSERT(mp);
 328                 /*
 329                  * Cannot put a buftrace here since if the buffer is not
 330                  * B_HOLD then we will brelse() the buffer before returning
 331                  * from bwrite and we could be tracing a buffer that has
 332                  * been reused.
 333                  */
 334                 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
 335         }
 336         return (error);
 337 }
 338
 339 /*
 340  * helper function to extract extent size hint from inode
 341  */
 342 xfs_extlen_t
 343 xfs_get_extsz_hint(
 344         struct xfs_inode        *ip)
 345 {
 346         xfs_extlen_t            extsz;
 347
 348         if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
 349                 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
 350                                 ? ip->i_d.di_extsize
 351                                 : ip->i_mount->m_sb.sb_rextsize;
 352                 ASSERT(extsz);
 353         } else {
 354                 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
 355                                 ? ip->i_d.di_extsize : 0;
 356         }
 357
 358         return extsz;
 359 }