fs/xfs/xfs_super.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4  * All Rights Reserved.
   5  */
   6
   7 #include "xfs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_sb.h"
  13 #include "xfs_mount.h"
  14 #include "xfs_da_format.h"
  15 #include "xfs_inode.h"
  16 #include "xfs_btree.h"
  17 #include "xfs_bmap.h"
  18 #include "xfs_alloc.h"
  19 #include "xfs_error.h"
  20 #include "xfs_fsops.h"
  21 #include "xfs_trans.h"
  22 #include "xfs_buf_item.h"
  23 #include "xfs_log.h"
  24 #include "xfs_log_priv.h"
  25 #include "xfs_da_btree.h"
  26 #include "xfs_dir2.h"
  27 #include "xfs_extfree_item.h"
  28 #include "xfs_mru_cache.h"
  29 #include "xfs_inode_item.h"
  30 #include "xfs_icache.h"
  31 #include "xfs_trace.h"
  32 #include "xfs_icreate_item.h"
  33 #include "xfs_filestream.h"
  34 #include "xfs_quota.h"
  35 #include "xfs_sysfs.h"
  36 #include "xfs_ondisk.h"
  37 #include "xfs_rmap_item.h"
  38 #include "xfs_refcount_item.h"
  39 #include "xfs_bmap_item.h"
  40 #include "xfs_reflink.h"
  41 #include "xfs_defer.h"
  42
  43 #include <linux/namei.h>
  44 #include <linux/dax.h>
  45 #include <linux/init.h>
  46 #include <linux/slab.h>
  47 #include <linux/magic.h>
  48 #include <linux/mount.h>
  49 #include <linux/mempool.h>
  50 #include <linux/writeback.h>
  51 #include <linux/kthread.h>
  52 #include <linux/freezer.h>
  53 #include <linux/parser.h>
  54
  55 static const struct super_operations xfs_super_operations;
  56 struct bio_set xfs_ioend_bioset;
  57
  58 static struct kset *xfs_kset;           /* top-level xfs sysfs dir */
  59 #ifdef DEBUG
  60 static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
  61 #endif
  62
  63 /*
  64  * Table driven mount option parser.
  65  */
  66 enum {
  67         Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
  68         Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
  69         Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
  70         Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
  71         Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
  72         Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
  73         Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
  74         Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
  75         Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
  76 };
  77
  78 static const match_table_t tokens = {
  79         {Opt_logbufs,   "logbufs=%u"},  /* number of XFS log buffers */
  80         {Opt_logbsize,  "logbsize=%s"}, /* size of XFS log buffers */
  81         {Opt_logdev,    "logdev=%s"},   /* log device */
  82         {Opt_rtdev,     "rtdev=%s"},    /* realtime I/O device */
  83         {Opt_biosize,   "biosize=%u"},  /* log2 of preferred buffered io size */
  84         {Opt_wsync,     "wsync"},       /* safe-mode nfs compatible mount */
  85         {Opt_noalign,   "noalign"},     /* turn off stripe alignment */
  86         {Opt_swalloc,   "swalloc"},     /* turn on stripe width allocation */
  87         {Opt_sunit,     "sunit=%u"},    /* data volume stripe unit */
  88         {Opt_swidth,    "swidth=%u"},   /* data volume stripe width */
  89         {Opt_nouuid,    "nouuid"},      /* ignore filesystem UUID */
  90         {Opt_mtpt,      "mtpt"},        /* filesystem mount point */
  91         {Opt_grpid,     "grpid"},       /* group-ID from parent directory */
  92         {Opt_nogrpid,   "nogrpid"},     /* group-ID from current process */
  93         {Opt_bsdgroups, "bsdgroups"},   /* group-ID from parent directory */
  94         {Opt_sysvgroups,"sysvgroups"},  /* group-ID from current process */
  95         {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
  96         {Opt_norecovery,"norecovery"},  /* don't run XFS recovery */
  97         {Opt_inode64,   "inode64"},     /* inodes can be allocated anywhere */
  98         {Opt_inode32,   "inode32"},     /* inode allocation limited to
  99                                          * XFS_MAXINUMBER_32 */
 100         {Opt_ikeep,     "ikeep"},       /* do not free empty inode clusters */
 101         {Opt_noikeep,   "noikeep"},     /* free empty inode clusters */
 102         {Opt_largeio,   "largeio"},     /* report large I/O sizes in stat() */
 103         {Opt_nolargeio, "nolargeio"},   /* do not report large I/O sizes
 104                                          * in stat(). */
 105         {Opt_attr2,     "attr2"},       /* do use attr2 attribute format */
 106         {Opt_noattr2,   "noattr2"},     /* do not use attr2 attribute format */
 107         {Opt_filestreams,"filestreams"},/* use filestreams allocator */
 108         {Opt_quota,     "quota"},       /* disk quotas (user) */
 109         {Opt_noquota,   "noquota"},     /* no quotas */
 110         {Opt_usrquota,  "usrquota"},    /* user quota enabled */
 111         {Opt_grpquota,  "grpquota"},    /* group quota enabled */
 112         {Opt_prjquota,  "prjquota"},    /* project quota enabled */
 113         {Opt_uquota,    "uquota"},      /* user quota (IRIX variant) */
 114         {Opt_gquota,    "gquota"},      /* group quota (IRIX variant) */
 115         {Opt_pquota,    "pquota"},      /* project quota (IRIX variant) */
 116         {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
 117         {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
 118         {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
 119         {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
 120         {Opt_discard,   "discard"},     /* Discard unused blocks */
 121         {Opt_nodiscard, "nodiscard"},   /* Do not discard unused blocks */
 122         {Opt_dax,       "dax"},         /* Enable direct access to bdev pages */
 123         {Opt_err,       NULL},
 124 };
 125
 126
 127 STATIC int
 128 suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
 129 {
 130         int     last, shift_left_factor = 0, _res;
 131         char    *value;
 132         int     ret = 0;
 133
 134         value = match_strdup(s);
 135         if (!value)
 136                 return -ENOMEM;
 137
 138         last = strlen(value) - 1;
 139         if (value[last] == 'K' || value[last] == 'k') {
 140                 shift_left_factor = 10;
 141                 value[last] = '\0';
 142         }
 143         if (value[last] == 'M' || value[last] == 'm') {
 144                 shift_left_factor = 20;
 145                 value[last] = '\0';
 146         }
 147         if (value[last] == 'G' || value[last] == 'g') {
 148                 shift_left_factor = 30;
 149                 value[last] = '\0';
 150         }
 151
 152         if (kstrtoint(value, base, &_res))
 153                 ret = -EINVAL;
 154         kfree(value);
 155         *res = _res << shift_left_factor;
 156         return ret;
 157 }
 158
 159 /*
 160  * This function fills in xfs_mount_t fields based on mount args.
 161  * Note: the superblock has _not_ yet been read in.
 162  *
 163  * Note that this function leaks the various device name allocations on
 164  * failure.  The caller takes care of them.
 165  *
 166  * *sb is const because this is also used to test options on the remount
 167  * path, and we don't want this to have any side effects at remount time.
 168  * Today this function does not change *sb, but just to future-proof...
 169  */
 170 STATIC int
 171 xfs_parseargs(
 172         struct xfs_mount        *mp,
 173         char                    *options)
 174 {
 175         const struct super_block *sb = mp->m_super;
 176         char                    *p;
 177         substring_t             args[MAX_OPT_ARGS];
 178         int                     dsunit = 0;
 179         int                     dswidth = 0;
 180         int                     iosize = 0;
 181         uint8_t                 iosizelog = 0;
 182
 183         /*
 184          * set up the mount name first so all the errors will refer to the
 185          * correct device.
 186          */
 187         mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
 188         if (!mp->m_fsname)
 189                 return -ENOMEM;
 190         mp->m_fsname_len = strlen(mp->m_fsname) + 1;
 191
 192         /*
 193          * Copy binary VFS mount flags we are interested in.
 194          */
 195         if (sb_rdonly(sb))
 196                 mp->m_flags |= XFS_MOUNT_RDONLY;
 197         if (sb->s_flags & SB_DIRSYNC)
 198                 mp->m_flags |= XFS_MOUNT_DIRSYNC;
 199         if (sb->s_flags & SB_SYNCHRONOUS)
 200                 mp->m_flags |= XFS_MOUNT_WSYNC;
 201
 202         /*
 203          * Set some default flags that could be cleared by the mount option
 204          * parsing.
 205          */
 206         mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 207
 208         /*
 209          * These can be overridden by the mount option parsing.
 210          */
 211         mp->m_logbufs = -1;
 212         mp->m_logbsize = -1;
 213
 214         if (!options)
 215                 goto done;
 216
 217         while ((p = strsep(&options, ",")) != NULL) {
 218                 int             token;
 219
 220                 if (!*p)
 221                         continue;
 222
 223                 token = match_token(p, tokens, args);
 224                 switch (token) {
 225                 case Opt_logbufs:
 226                         if (match_int(args, &mp->m_logbufs))
 227                                 return -EINVAL;
 228                         break;
 229                 case Opt_logbsize:
 230                         if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
 231                                 return -EINVAL;
 232                         break;
 233                 case Opt_logdev:
 234                         kfree(mp->m_logname);
 235                         mp->m_logname = match_strdup(args);
 236                         if (!mp->m_logname)
 237                                 return -ENOMEM;
 238                         break;
 239                 case Opt_mtpt:
 240                         xfs_warn(mp, "%s option not allowed on this system", p);
 241                         return -EINVAL;
 242                 case Opt_rtdev:
 243                         kfree(mp->m_rtname);
 244                         mp->m_rtname = match_strdup(args);
 245                         if (!mp->m_rtname)
 246                                 return -ENOMEM;
 247                         break;
 248                 case Opt_allocsize:
 249                 case Opt_biosize:
 250                         if (suffix_kstrtoint(args, 10, &iosize))
 251                                 return -EINVAL;
 252                         iosizelog = ffs(iosize) - 1;
 253                         break;
 254                 case Opt_grpid:
 255                 case Opt_bsdgroups:
 256                         mp->m_flags |= XFS_MOUNT_GRPID;
 257                         break;
 258                 case Opt_nogrpid:
 259                 case Opt_sysvgroups:
 260                         mp->m_flags &= ~XFS_MOUNT_GRPID;
 261                         break;
 262                 case Opt_wsync:
 263                         mp->m_flags |= XFS_MOUNT_WSYNC;
 264                         break;
 265                 case Opt_norecovery:
 266                         mp->m_flags |= XFS_MOUNT_NORECOVERY;
 267                         break;
 268                 case Opt_noalign:
 269                         mp->m_flags |= XFS_MOUNT_NOALIGN;
 270                         break;
 271                 case Opt_swalloc:
 272                         mp->m_flags |= XFS_MOUNT_SWALLOC;
 273                         break;
 274                 case Opt_sunit:
 275                         if (match_int(args, &dsunit))
 276                                 return -EINVAL;
 277                         break;
 278                 case Opt_swidth:
 279                         if (match_int(args, &dswidth))
 280                                 return -EINVAL;
 281                         break;
 282                 case Opt_inode32:
 283                         mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 284                         break;
 285                 case Opt_inode64:
 286                         mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 287                         break;
 288                 case Opt_nouuid:
 289                         mp->m_flags |= XFS_MOUNT_NOUUID;
 290                         break;
 291                 case Opt_ikeep:
 292                         mp->m_flags |= XFS_MOUNT_IKEEP;
 293                         break;
 294                 case Opt_noikeep:
 295                         mp->m_flags &= ~XFS_MOUNT_IKEEP;
 296                         break;
 297                 case Opt_largeio:
 298                         mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
 299                         break;
 300                 case Opt_nolargeio:
 301                         mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
 302                         break;
 303                 case Opt_attr2:
 304                         mp->m_flags |= XFS_MOUNT_ATTR2;
 305                         break;
 306                 case Opt_noattr2:
 307                         mp->m_flags &= ~XFS_MOUNT_ATTR2;
 308                         mp->m_flags |= XFS_MOUNT_NOATTR2;
 309                         break;
 310                 case Opt_filestreams:
 311                         mp->m_flags |= XFS_MOUNT_FILESTREAMS;
 312                         break;
 313                 case Opt_noquota:
 314                         mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
 315                         mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
 316                         mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
 317                         break;
 318                 case Opt_quota:
 319                 case Opt_uquota:
 320                 case Opt_usrquota:
 321                         mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
 322                                          XFS_UQUOTA_ENFD);
 323                         break;
 324                 case Opt_qnoenforce:
 325                 case Opt_uqnoenforce:
 326                         mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
 327                         mp->m_qflags &= ~XFS_UQUOTA_ENFD;
 328                         break;
 329                 case Opt_pquota:
 330                 case Opt_prjquota:
 331                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
 332                                          XFS_PQUOTA_ENFD);
 333                         break;
 334                 case Opt_pqnoenforce:
 335                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
 336                         mp->m_qflags &= ~XFS_PQUOTA_ENFD;
 337                         break;
 338                 case Opt_gquota:
 339                 case Opt_grpquota:
 340                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
 341                                          XFS_GQUOTA_ENFD);
 342                         break;
 343                 case Opt_gqnoenforce:
 344                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
 345                         mp->m_qflags &= ~XFS_GQUOTA_ENFD;
 346                         break;
 347                 case Opt_discard:
 348                         mp->m_flags |= XFS_MOUNT_DISCARD;
 349                         break;
 350                 case Opt_nodiscard:
 351                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
 352                         break;
 353 #ifdef CONFIG_FS_DAX
 354                 case Opt_dax:
 355                         mp->m_flags |= XFS_MOUNT_DAX;
 356                         break;
 357 #endif
 358                 default:
 359                         xfs_warn(mp, "unknown mount option [%s].", p);
 360                         return -EINVAL;
 361                 }
 362         }
 363
 364         /*
 365          * no recovery flag requires a read-only mount
 366          */
 367         if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
 368             !(mp->m_flags & XFS_MOUNT_RDONLY)) {
 369                 xfs_warn(mp, "no-recovery mounts must be read-only.");
 370                 return -EINVAL;
 371         }
 372
 373         if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
 374                 xfs_warn(mp,
 375         "sunit and swidth options incompatible with the noalign option");
 376                 return -EINVAL;
 377         }
 378
 379 #ifndef CONFIG_XFS_QUOTA
 380         if (XFS_IS_QUOTA_RUNNING(mp)) {
 381                 xfs_warn(mp, "quota support not available in this kernel.");
 382                 return -EINVAL;
 383         }
 384 #endif
 385
 386         if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
 387                 xfs_warn(mp, "sunit and swidth must be specified together");
 388                 return -EINVAL;
 389         }
 390
 391         if (dsunit && (dswidth % dsunit != 0)) {
 392                 xfs_warn(mp,
 393         "stripe width (%d) must be a multiple of the stripe unit (%d)",
 394                         dswidth, dsunit);
 395                 return -EINVAL;
 396         }
 397
 398 done:
 399         if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
 400                 /*
 401                  * At this point the superblock has not been read
 402                  * in, therefore we do not know the block size.
 403                  * Before the mount call ends we will convert
 404                  * these to FSBs.
 405                  */
 406                 mp->m_dalign = dsunit;
 407                 mp->m_swidth = dswidth;
 408         }
 409
 410         if (mp->m_logbufs != -1 &&
 411             mp->m_logbufs != 0 &&
 412             (mp->m_logbufs < XLOG_MIN_ICLOGS ||
 413              mp->m_logbufs > XLOG_MAX_ICLOGS)) {
 414                 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
 415                         mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
 416                 return -EINVAL;
 417         }
 418         if (mp->m_logbsize != -1 &&
 419             mp->m_logbsize !=  0 &&
 420             (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
 421              mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
 422              !is_power_of_2(mp->m_logbsize))) {
 423                 xfs_warn(mp,
 424                         "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
 425                         mp->m_logbsize);
 426                 return -EINVAL;
 427         }
 428
 429         if (iosizelog) {
 430                 if (iosizelog > XFS_MAX_IO_LOG ||
 431                     iosizelog < XFS_MIN_IO_LOG) {
 432                         xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
 433                                 iosizelog, XFS_MIN_IO_LOG,
 434                                 XFS_MAX_IO_LOG);
 435                         return -EINVAL;
 436                 }
 437
 438                 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
 439                 mp->m_readio_log = iosizelog;
 440                 mp->m_writeio_log = iosizelog;
 441         }
 442
 443         return 0;
 444 }
 445
 446 struct proc_xfs_info {
 447         uint64_t        flag;
 448         char            *str;
 449 };
 450
 451 STATIC int
 452 xfs_showargs(
 453         struct xfs_mount        *mp,
 454         struct seq_file         *m)
 455 {
 456         static struct proc_xfs_info xfs_info_set[] = {
 457                 /* the few simple ones we can get from the mount struct */
 458                 { XFS_MOUNT_IKEEP,              ",ikeep" },
 459                 { XFS_MOUNT_WSYNC,              ",wsync" },
 460                 { XFS_MOUNT_NOALIGN,            ",noalign" },
 461                 { XFS_MOUNT_SWALLOC,            ",swalloc" },
 462                 { XFS_MOUNT_NOUUID,             ",nouuid" },
 463                 { XFS_MOUNT_NORECOVERY,         ",norecovery" },
 464                 { XFS_MOUNT_ATTR2,              ",attr2" },
 465                 { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
 466                 { XFS_MOUNT_GRPID,              ",grpid" },
 467                 { XFS_MOUNT_DISCARD,            ",discard" },
 468                 { XFS_MOUNT_SMALL_INUMS,        ",inode32" },
 469                 { XFS_MOUNT_DAX,                ",dax" },
 470                 { 0, NULL }
 471         };
 472         static struct proc_xfs_info xfs_info_unset[] = {
 473                 /* the few simple ones we can get from the mount struct */
 474                 { XFS_MOUNT_COMPAT_IOSIZE,      ",largeio" },
 475                 { XFS_MOUNT_SMALL_INUMS,        ",inode64" },
 476                 { 0, NULL }
 477         };
 478         struct proc_xfs_info    *xfs_infop;
 479
 480         for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
 481                 if (mp->m_flags & xfs_infop->flag)
 482                         seq_puts(m, xfs_infop->str);
 483         }
 484         for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
 485                 if (!(mp->m_flags & xfs_infop->flag))
 486                         seq_puts(m, xfs_infop->str);
 487         }
 488
 489         if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
 490                 seq_printf(m, ",allocsize=%dk",
 491                                 (int)(1 << mp->m_writeio_log) >> 10);
 492
 493         if (mp->m_logbufs > 0)
 494                 seq_printf(m, ",logbufs=%d", mp->m_logbufs);
 495         if (mp->m_logbsize > 0)
 496                 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
 497
 498         if (mp->m_logname)
 499                 seq_show_option(m, "logdev", mp->m_logname);
 500         if (mp->m_rtname)
 501                 seq_show_option(m, "rtdev", mp->m_rtname);
 502
 503         if (mp->m_dalign > 0)
 504                 seq_printf(m, ",sunit=%d",
 505                                 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
 506         if (mp->m_swidth > 0)
 507                 seq_printf(m, ",swidth=%d",
 508                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 509
 510         if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
 511                 seq_puts(m, ",usrquota");
 512         else if (mp->m_qflags & XFS_UQUOTA_ACCT)
 513                 seq_puts(m, ",uqnoenforce");
 514
 515         if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 516                 if (mp->m_qflags & XFS_PQUOTA_ENFD)
 517                         seq_puts(m, ",prjquota");
 518                 else
 519                         seq_puts(m, ",pqnoenforce");
 520         }
 521         if (mp->m_qflags & XFS_GQUOTA_ACCT) {
 522                 if (mp->m_qflags & XFS_GQUOTA_ENFD)
 523                         seq_puts(m, ",grpquota");
 524                 else
 525                         seq_puts(m, ",gqnoenforce");
 526         }
 527
 528         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
 529                 seq_puts(m, ",noquota");
 530
 531         return 0;
 532 }
 533 static uint64_t
 534 xfs_max_file_offset(
 535         unsigned int            blockshift)
 536 {
 537         unsigned int            pagefactor = 1;
 538         unsigned int            bitshift = BITS_PER_LONG - 1;
 539
 540         /* Figure out maximum filesize, on Linux this can depend on
 541          * the filesystem blocksize (on 32 bit platforms).
 542          * __block_write_begin does this in an [unsigned] long...
 543          *      page->index << (PAGE_SHIFT - bbits)
 544          * So, for page sized blocks (4K on 32 bit platforms),
 545          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
 546          *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
 547          * but for smaller blocksizes it is less (bbits = log2 bsize).
 548          * Note1: get_block_t takes a long (implicit cast from above)
 549          * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
 550          * can optionally convert the [unsigned] long from above into
 551          * an [unsigned] long long.
 552          */
 553
 554 #if BITS_PER_LONG == 32
 555 # if defined(CONFIG_LBDAF)
 556         ASSERT(sizeof(sector_t) == 8);
 557         pagefactor = PAGE_SIZE;
 558         bitshift = BITS_PER_LONG;
 559 # else
 560         pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
 561 # endif
 562 #endif
 563
 564         return (((uint64_t)pagefactor) << bitshift) - 1;
 565 }
 566
 567 /*
 568  * Set parameters for inode allocation heuristics, taking into account
 569  * filesystem size and inode32/inode64 mount options; i.e. specifically
 570  * whether or not XFS_MOUNT_SMALL_INUMS is set.
 571  *
 572  * Inode allocation patterns are altered only if inode32 is requested
 573  * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
 574  * If altered, XFS_MOUNT_32BITINODES is set as well.
 575  *
 576  * An agcount independent of that in the mount structure is provided
 577  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
 578  * to the potentially higher ag count.
 579  *
 580  * Returns the maximum AG index which may contain inodes.
 581  */
 582 xfs_agnumber_t
 583 xfs_set_inode_alloc(
 584         struct xfs_mount *mp,
 585         xfs_agnumber_t  agcount)
 586 {
 587         xfs_agnumber_t  index;
 588         xfs_agnumber_t  maxagi = 0;
 589         xfs_sb_t        *sbp = &mp->m_sb;
 590         xfs_agnumber_t  max_metadata;
 591         xfs_agino_t     agino;
 592         xfs_ino_t       ino;
 593
 594         /*
 595          * Calculate how much should be reserved for inodes to meet
 596          * the max inode percentage.  Used only for inode32.
 597          */
 598         if (mp->m_maxicount) {
 599                 uint64_t        icount;
 600
 601                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 602                 do_div(icount, 100);
 603                 icount += sbp->sb_agblocks - 1;
 604                 do_div(icount, sbp->sb_agblocks);
 605                 max_metadata = icount;
 606         } else {
 607                 max_metadata = agcount;
 608         }
 609
 610         /* Get the last possible inode in the filesystem */
 611         agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
 612         ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
 613
 614         /*
 615          * If user asked for no more than 32-bit inodes, and the fs is
 616          * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
 617          * the allocator to accommodate the request.
 618          */
 619         if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
 620                 mp->m_flags |= XFS_MOUNT_32BITINODES;
 621         else
 622                 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 623
 624         for (index = 0; index < agcount; index++) {
 625                 struct xfs_perag        *pag;
 626
 627                 ino = XFS_AGINO_TO_INO(mp, index, agino);
 628
 629                 pag = xfs_perag_get(mp, index);
 630
 631                 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
 632                         if (ino > XFS_MAXINUMBER_32) {
 633                                 pag->pagi_inodeok = 0;
 634                                 pag->pagf_metadata = 0;
 635                         } else {
 636                                 pag->pagi_inodeok = 1;
 637                                 maxagi++;
 638                                 if (index < max_metadata)
 639                                         pag->pagf_metadata = 1;
 640                                 else
 641                                         pag->pagf_metadata = 0;
 642                         }
 643                 } else {
 644                         pag->pagi_inodeok = 1;
 645                         pag->pagf_metadata = 0;
 646                 }
 647
 648                 xfs_perag_put(pag);
 649         }
 650
 651         return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
 652 }
 653
 654 STATIC int
 655 xfs_blkdev_get(
 656         xfs_mount_t             *mp,
 657         const char              *name,
 658         struct block_device     **bdevp)
 659 {
 660         int                     error = 0;
 661
 662         *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 663                                     mp);
 664         if (IS_ERR(*bdevp)) {
 665                 error = PTR_ERR(*bdevp);
 666                 xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
 667         }
 668
 669         return error;
 670 }
 671
 672 STATIC void
 673 xfs_blkdev_put(
 674         struct block_device     *bdev)
 675 {
 676         if (bdev)
 677                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 678 }
 679
 680 void
 681 xfs_blkdev_issue_flush(
 682         xfs_buftarg_t           *buftarg)
 683 {
 684         blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
 685 }
 686
 687 STATIC void
 688 xfs_close_devices(
 689         struct xfs_mount        *mp)
 690 {
 691         struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
 692
 693         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 694                 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 695                 struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
 696
 697                 xfs_free_buftarg(mp->m_logdev_targp);
 698                 xfs_blkdev_put(logdev);
 699                 fs_put_dax(dax_logdev);
 700         }
 701         if (mp->m_rtdev_targp) {
 702                 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 703                 struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
 704
 705                 xfs_free_buftarg(mp->m_rtdev_targp);
 706                 xfs_blkdev_put(rtdev);
 707                 fs_put_dax(dax_rtdev);
 708         }
 709         xfs_free_buftarg(mp->m_ddev_targp);
 710         fs_put_dax(dax_ddev);
 711 }
 712
 713 /*
 714  * The file system configurations are:
 715  *      (1) device (partition) with data and internal log
 716  *      (2) logical volume with data and log subvolumes.
 717  *      (3) logical volume with data, log, and realtime subvolumes.
 718  *
 719  * We only have to handle opening the log and realtime volumes here if
 720  * they are present.  The data subvolume has already been opened by
 721  * get_sb_bdev() and is stored in sb->s_bdev.
 722  */
 723 STATIC int
 724 xfs_open_devices(
 725         struct xfs_mount        *mp)
 726 {
 727         struct block_device     *ddev = mp->m_super->s_bdev;
 728         struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
 729         struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
 730         struct block_device     *logdev = NULL, *rtdev = NULL;
 731         int                     error;
 732
 733         /*
 734          * Open real time and log devices - order is important.
 735          */
 736         if (mp->m_logname) {
 737                 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
 738                 if (error)
 739                         goto out;
 740                 dax_logdev = fs_dax_get_by_bdev(logdev);
 741         }
 742
 743         if (mp->m_rtname) {
 744                 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
 745                 if (error)
 746                         goto out_close_logdev;
 747
 748                 if (rtdev == ddev || rtdev == logdev) {
 749                         xfs_warn(mp,
 750         "Cannot mount filesystem with identical rtdev and ddev/logdev.");
 751                         error = -EINVAL;
 752                         goto out_close_rtdev;
 753                 }
 754                 dax_rtdev = fs_dax_get_by_bdev(rtdev);
 755         }
 756
 757         /*
 758          * Setup xfs_mount buffer target pointers
 759          */
 760         error = -ENOMEM;
 761         mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
 762         if (!mp->m_ddev_targp)
 763                 goto out_close_rtdev;
 764
 765         if (rtdev) {
 766                 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
 767                 if (!mp->m_rtdev_targp)
 768                         goto out_free_ddev_targ;
 769         }
 770
 771         if (logdev && logdev != ddev) {
 772                 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
 773                 if (!mp->m_logdev_targp)
 774                         goto out_free_rtdev_targ;
 775         } else {
 776                 mp->m_logdev_targp = mp->m_ddev_targp;
 777         }
 778
 779         return 0;
 780
 781  out_free_rtdev_targ:
 782         if (mp->m_rtdev_targp)
 783                 xfs_free_buftarg(mp->m_rtdev_targp);
 784  out_free_ddev_targ:
 785         xfs_free_buftarg(mp->m_ddev_targp);
 786  out_close_rtdev:
 787         xfs_blkdev_put(rtdev);
 788         fs_put_dax(dax_rtdev);
 789  out_close_logdev:
 790         if (logdev && logdev != ddev) {
 791                 xfs_blkdev_put(logdev);
 792                 fs_put_dax(dax_logdev);
 793         }
 794  out:
 795         fs_put_dax(dax_ddev);
 796         return error;
 797 }
 798
 799 /*
 800  * Setup xfs_mount buffer target pointers based on superblock
 801  */
 802 STATIC int
 803 xfs_setup_devices(
 804         struct xfs_mount        *mp)
 805 {
 806         int                     error;
 807
 808         error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
 809         if (error)
 810                 return error;
 811
 812         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
 813                 unsigned int    log_sector_size = BBSIZE;
 814
 815                 if (xfs_sb_version_hassector(&mp->m_sb))
 816                         log_sector_size = mp->m_sb.sb_logsectsize;
 817                 error = xfs_setsize_buftarg(mp->m_logdev_targp,
 818                                             log_sector_size);
 819                 if (error)
 820                         return error;
 821         }
 822         if (mp->m_rtdev_targp) {
 823                 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
 824                                             mp->m_sb.sb_sectsize);
 825                 if (error)
 826                         return error;
 827         }
 828
 829         return 0;
 830 }
 831
 832 STATIC int
 833 xfs_init_mount_workqueues(
 834         struct xfs_mount        *mp)
 835 {
 836         mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
 837                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
 838         if (!mp->m_buf_workqueue)
 839                 goto out;
 840
 841         mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
 842                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 843         if (!mp->m_data_workqueue)
 844                 goto out_destroy_buf;
 845
 846         mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
 847                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 848         if (!mp->m_unwritten_workqueue)
 849                 goto out_destroy_data_iodone_queue;
 850
 851         mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
 852                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 853         if (!mp->m_cil_workqueue)
 854                 goto out_destroy_unwritten;
 855
 856         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
 857                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 858         if (!mp->m_reclaim_workqueue)
 859                 goto out_destroy_cil;
 860
 861         mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
 862                         WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
 863                         mp->m_fsname);
 864         if (!mp->m_log_workqueue)
 865                 goto out_destroy_reclaim;
 866
 867         mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
 868                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
 869         if (!mp->m_eofblocks_workqueue)
 870                 goto out_destroy_log;
 871
 872         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
 873                                                mp->m_fsname);
 874         if (!mp->m_sync_workqueue)
 875                 goto out_destroy_eofb;
 876
 877         return 0;
 878
 879 out_destroy_eofb:
 880         destroy_workqueue(mp->m_eofblocks_workqueue);
 881 out_destroy_log:
 882         destroy_workqueue(mp->m_log_workqueue);
 883 out_destroy_reclaim:
 884         destroy_workqueue(mp->m_reclaim_workqueue);
 885 out_destroy_cil:
 886         destroy_workqueue(mp->m_cil_workqueue);
 887 out_destroy_unwritten:
 888         destroy_workqueue(mp->m_unwritten_workqueue);
 889 out_destroy_data_iodone_queue:
 890         destroy_workqueue(mp->m_data_workqueue);
 891 out_destroy_buf:
 892         destroy_workqueue(mp->m_buf_workqueue);
 893 out:
 894         return -ENOMEM;
 895 }
 896
 897 STATIC void
 898 xfs_destroy_mount_workqueues(
 899         struct xfs_mount        *mp)
 900 {
 901         destroy_workqueue(mp->m_sync_workqueue);
 902         destroy_workqueue(mp->m_eofblocks_workqueue);
 903         destroy_workqueue(mp->m_log_workqueue);
 904         destroy_workqueue(mp->m_reclaim_workqueue);
 905         destroy_workqueue(mp->m_cil_workqueue);
 906         destroy_workqueue(mp->m_data_workqueue);
 907         destroy_workqueue(mp->m_unwritten_workqueue);
 908         destroy_workqueue(mp->m_buf_workqueue);
 909 }
 910
 911 /*
 912  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
 913  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
 914  * for IO to complete so that we effectively throttle multiple callers to the
 915  * rate at which IO is completing.
 916  */
 917 void
 918 xfs_flush_inodes(
 919         struct xfs_mount        *mp)
 920 {
 921         struct super_block      *sb = mp->m_super;
 922
 923         if (down_read_trylock(&sb->s_umount)) {
 924                 sync_inodes_sb(sb);
 925                 up_read(&sb->s_umount);
 926         }
 927 }
 928
 929 /* Catch misguided souls that try to use this interface on XFS */
 930 STATIC struct inode *
 931 xfs_fs_alloc_inode(
 932         struct super_block      *sb)
 933 {
 934         BUG();
 935         return NULL;
 936 }
 937
 938 #ifdef DEBUG
 939 static void
 940 xfs_check_delalloc(
 941         struct xfs_inode        *ip,
 942         int                     whichfork)
 943 {
 944         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 945         struct xfs_bmbt_irec    got;
 946         struct xfs_iext_cursor  icur;
 947
 948         if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
 949                 return;
 950         do {
 951                 if (isnullstartblock(got.br_startblock)) {
 952                         xfs_warn(ip->i_mount,
 953         "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
 954                                 ip->i_ino,
 955                                 whichfork == XFS_DATA_FORK ? "data" : "cow",
 956                                 got.br_startoff, got.br_blockcount);
 957                 }
 958         } while (xfs_iext_next_extent(ifp, &icur, &got));
 959 }
 960 #else
 961 #define xfs_check_delalloc(ip, whichfork)       do { } while (0)
 962 #endif
 963
 964 /*
 965  * Now that the generic code is guaranteed not to be accessing
 966  * the linux inode, we can inactivate and reclaim the inode.
 967  */
 968 STATIC void
 969 xfs_fs_destroy_inode(
 970         struct inode            *inode)
 971 {
 972         struct xfs_inode        *ip = XFS_I(inode);
 973
 974         trace_xfs_destroy_inode(ip);
 975
 976         ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 977         XFS_STATS_INC(ip->i_mount, vn_rele);
 978         XFS_STATS_INC(ip->i_mount, vn_remove);
 979
 980         xfs_inactive(ip);
 981
 982         if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
 983                 xfs_check_delalloc(ip, XFS_DATA_FORK);
 984                 xfs_check_delalloc(ip, XFS_COW_FORK);
 985                 ASSERT(0);
 986         }
 987
 988         XFS_STATS_INC(ip->i_mount, vn_reclaim);
 989
 990         /*
 991          * We should never get here with one of the reclaim flags already set.
 992          */
 993         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 994         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
 995
 996         /*
 997          * We always use background reclaim here because even if the
 998          * inode is clean, it still may be under IO and hence we have
 999          * to take the flush lock. The background reclaim path handles
1000          * this more efficiently than we can here, so simply let background
1001          * reclaim tear down all inodes.
1002          */
1003         xfs_inode_set_reclaim_tag(ip);
1004 }
1005
1006 static void
1007 xfs_fs_dirty_inode(
1008         struct inode                    *inode,
1009         int                             flag)
1010 {
1011         struct xfs_inode                *ip = XFS_I(inode);
1012         struct xfs_mount                *mp = ip->i_mount;
1013         struct xfs_trans                *tp;
1014
1015         if (!(inode->i_sb->s_flags & SB_LAZYTIME))
1016                 return;
1017         if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
1018                 return;
1019
1020         if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
1021                 return;
1022         xfs_ilock(ip, XFS_ILOCK_EXCL);
1023         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1024         xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
1025         xfs_trans_commit(tp);
1026 }
1027
1028 /*
1029  * Slab object creation initialisation for the XFS inode.
1030  * This covers only the idempotent fields in the XFS inode;
1031  * all other fields need to be initialised on allocation
1032  * from the slab. This avoids the need to repeatedly initialise
1033  * fields in the xfs inode that left in the initialise state
1034  * when freeing the inode.
1035  */
1036 STATIC void
1037 xfs_fs_inode_init_once(
1038         void                    *inode)
1039 {
1040         struct xfs_inode        *ip = inode;
1041
1042         memset(ip, 0, sizeof(struct xfs_inode));
1043
1044         /* vfs inode */
1045         inode_init_once(VFS_I(ip));
1046
1047         /* xfs inode */
1048         atomic_set(&ip->i_pincount, 0);
1049         spin_lock_init(&ip->i_flags_lock);
1050
1051         mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1052                      "xfsino", ip->i_ino);
1053         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1054                      "xfsino", ip->i_ino);
1055 }
1056
1057 /*
1058  * We do an unlocked check for XFS_IDONTCACHE here because we are already
1059  * serialised against cache hits here via the inode->i_lock and igrab() in
1060  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
1061  * racing with us, and it avoids needing to grab a spinlock here for every inode
1062  * we drop the final reference on.
1063  */
1064 STATIC int
1065 xfs_fs_drop_inode(
1066         struct inode            *inode)
1067 {
1068         struct xfs_inode        *ip = XFS_I(inode);
1069
1070         /*
1071          * If this unlinked inode is in the middle of recovery, don't
1072          * drop the inode just yet; log recovery will take care of
1073          * that.  See the comment for this inode flag.
1074          */
1075         if (ip->i_flags & XFS_IRECOVERY) {
1076                 ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
1077                 return 0;
1078         }
1079
1080         return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
1081 }
1082
1083 STATIC void
1084 xfs_free_fsname(
1085         struct xfs_mount        *mp)
1086 {
1087         kfree(mp->m_fsname);
1088         kfree(mp->m_rtname);
1089         kfree(mp->m_logname);
1090 }
1091
1092 STATIC int
1093 xfs_fs_sync_fs(
1094         struct super_block      *sb,
1095         int                     wait)
1096 {
1097         struct xfs_mount        *mp = XFS_M(sb);
1098
1099         /*
1100          * Doing anything during the async pass would be counterproductive.
1101          */
1102         if (!wait)
1103                 return 0;
1104
1105         xfs_log_force(mp, XFS_LOG_SYNC);
1106         if (laptop_mode) {
1107                 /*
1108                  * The disk must be active because we're syncing.
1109                  * We schedule log work now (now that the disk is
1110                  * active) instead of later (when it might not be).
1111                  */
1112                 flush_delayed_work(&mp->m_log->l_work);
1113         }
1114
1115         return 0;
1116 }
1117
1118 STATIC int
1119 xfs_fs_statfs(
1120         struct dentry           *dentry,
1121         struct kstatfs          *statp)
1122 {
1123         struct xfs_mount        *mp = XFS_M(dentry->d_sb);
1124         xfs_sb_t                *sbp = &mp->m_sb;
1125         struct xfs_inode        *ip = XFS_I(d_inode(dentry));
1126         uint64_t                fakeinos, id;
1127         uint64_t                icount;
1128         uint64_t                ifree;
1129         uint64_t                fdblocks;
1130         xfs_extlen_t            lsize;
1131         int64_t                 ffree;
1132
1133         statp->f_type = XFS_SUPER_MAGIC;
1134         statp->f_namelen = MAXNAMELEN - 1;
1135
1136         id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
1137         statp->f_fsid.val[0] = (u32)id;
1138         statp->f_fsid.val[1] = (u32)(id >> 32);
1139
1140         icount = percpu_counter_sum(&mp->m_icount);
1141         ifree = percpu_counter_sum(&mp->m_ifree);
1142         fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1143
1144         spin_lock(&mp->m_sb_lock);
1145         statp->f_bsize = sbp->sb_blocksize;
1146         lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1147         statp->f_blocks = sbp->sb_dblocks - lsize;
1148         spin_unlock(&mp->m_sb_lock);
1149
1150         statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1151         statp->f_bavail = statp->f_bfree;
1152
1153         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
1154         statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
1155         if (mp->m_maxicount)
1156                 statp->f_files = min_t(typeof(statp->f_files),
1157                                         statp->f_files,
1158                                         mp->m_maxicount);
1159
1160         /* If sb_icount overshot maxicount, report actual allocation */
1161         statp->f_files = max_t(typeof(statp->f_files),
1162                                         statp->f_files,
1163                                         sbp->sb_icount);
1164
1165         /* make sure statp->f_ffree does not underflow */
1166         ffree = statp->f_files - (icount - ifree);
1167         statp->f_ffree = max_t(int64_t, ffree, 0);
1168
1169
1170         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1171             ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
1172                               (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
1173                 xfs_qm_statvfs(ip, statp);
1174
1175         if (XFS_IS_REALTIME_MOUNT(mp) &&
1176             (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
1177                 statp->f_blocks = sbp->sb_rblocks;
1178                 statp->f_bavail = statp->f_bfree =
1179                         sbp->sb_frextents * sbp->sb_rextsize;
1180         }
1181
1182         return 0;
1183 }
1184
1185 STATIC void
1186 xfs_save_resvblks(struct xfs_mount *mp)
1187 {
1188         uint64_t resblks = 0;
1189
1190         mp->m_resblks_save = mp->m_resblks;
1191         xfs_reserve_blocks(mp, &resblks, NULL);
1192 }
1193
1194 STATIC void
1195 xfs_restore_resvblks(struct xfs_mount *mp)
1196 {
1197         uint64_t resblks;
1198
1199         if (mp->m_resblks_save) {
1200                 resblks = mp->m_resblks_save;
1201                 mp->m_resblks_save = 0;
1202         } else
1203                 resblks = xfs_default_resblks(mp);
1204
1205         xfs_reserve_blocks(mp, &resblks, NULL);
1206 }
1207
1208 /*
1209  * Trigger writeback of all the dirty metadata in the file system.
1210  *
1211  * This ensures that the metadata is written to their location on disk rather
1212  * than just existing in transactions in the log. This means after a quiesce
1213  * there is no log replay required to write the inodes to disk - this is the
1214  * primary difference between a sync and a quiesce.
1215  *
1216  * Note: xfs_log_quiesce() stops background log work - the callers must ensure
1217  * it is started again when appropriate.
1218  */
1219 void
1220 xfs_quiesce_attr(
1221         struct xfs_mount        *mp)
1222 {
1223         int     error = 0;
1224
1225         /* wait for all modifications to complete */
1226         while (atomic_read(&mp->m_active_trans) > 0)
1227                 delay(100);
1228
1229         /* force the log to unpin objects from the now complete transactions */
1230         xfs_log_force(mp, XFS_LOG_SYNC);
1231
1232         /* reclaim inodes to do any IO before the freeze completes */
1233         xfs_reclaim_inodes(mp, 0);
1234         xfs_reclaim_inodes(mp, SYNC_WAIT);
1235
1236         /* Push the superblock and write an unmount record */
1237         error = xfs_log_sbcount(mp);
1238         if (error)
1239                 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
1240                                 "Frozen image may not be consistent.");
1241         /*
1242          * Just warn here till VFS can correctly support
1243          * read-only remount without racing.
1244          */
1245         WARN_ON(atomic_read(&mp->m_active_trans) != 0);
1246
1247         xfs_log_quiesce(mp);
1248 }
1249
1250 STATIC int
1251 xfs_test_remount_options(
1252         struct super_block      *sb,
1253         char                    *options)
1254 {
1255         int                     error = 0;
1256         struct xfs_mount        *tmp_mp;
1257
1258         tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
1259         if (!tmp_mp)
1260                 return -ENOMEM;
1261
1262         tmp_mp->m_super = sb;
1263         error = xfs_parseargs(tmp_mp, options);
1264         xfs_free_fsname(tmp_mp);
1265         kmem_free(tmp_mp);
1266
1267         return error;
1268 }
1269
1270 STATIC int
1271 xfs_fs_remount(
1272         struct super_block      *sb,
1273         int                     *flags,
1274         char                    *options)
1275 {
1276         struct xfs_mount        *mp = XFS_M(sb);
1277         xfs_sb_t                *sbp = &mp->m_sb;
1278         substring_t             args[MAX_OPT_ARGS];
1279         char                    *p;
1280         int                     error;
1281
1282         /* First, check for complete junk; i.e. invalid options */
1283         error = xfs_test_remount_options(sb, options);
1284         if (error)
1285                 return error;
1286
1287         sync_filesystem(sb);
1288         while ((p = strsep(&options, ",")) != NULL) {
1289                 int token;
1290
1291                 if (!*p)
1292                         continue;
1293
1294                 token = match_token(p, tokens, args);
1295                 switch (token) {
1296                 case Opt_inode64:
1297                         mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1298                         mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1299                         break;
1300                 case Opt_inode32:
1301                         mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1302                         mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1303                         break;
1304                 default:
1305                         /*
1306                          * Logically we would return an error here to prevent
1307                          * users from believing they might have changed
1308                          * mount options using remount which can't be changed.
1309                          *
1310                          * But unfortunately mount(8) adds all options from
1311                          * mtab and fstab to the mount arguments in some cases
1312                          * so we can't blindly reject options, but have to
1313                          * check for each specified option if it actually
1314                          * differs from the currently set option and only
1315                          * reject it if that's the case.
1316                          *
1317                          * Until that is implemented we return success for
1318                          * every remount request, and silently ignore all
1319                          * options that we can't actually change.
1320                          */
1321 #if 0
1322                         xfs_info(mp,
1323                 "mount option \"%s\" not supported for remount", p);
1324                         return -EINVAL;
1325 #else
1326                         break;
1327 #endif
1328                 }
1329         }
1330
1331         /* ro -> rw */
1332         if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
1333                 if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1334                         xfs_warn(mp,
1335                 "ro->rw transition prohibited on norecovery mount");
1336                         return -EINVAL;
1337                 }
1338
1339                 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1340                     xfs_sb_has_ro_compat_feature(sbp,
1341                                         XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1342                         xfs_warn(mp,
1343 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1344                                 (sbp->sb_features_ro_compat &
1345                                         XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1346                         return -EINVAL;
1347                 }
1348
1349                 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1350
1351                 /*
1352                  * If this is the first remount to writeable state we
1353                  * might have some superblock changes to update.
1354                  */
1355                 if (mp->m_update_sb) {
1356                         error = xfs_sync_sb(mp, false);
1357                         if (error) {
1358                                 xfs_warn(mp, "failed to write sb changes");
1359                                 return error;
1360                         }
1361                         mp->m_update_sb = false;
1362                 }
1363
1364                 /*
1365                  * Fill out the reserve pool if it is empty. Use the stashed
1366                  * value if it is non-zero, otherwise go with the default.
1367                  */
1368                 xfs_restore_resvblks(mp);
1369                 xfs_log_work_queue(mp);
1370
1371                 /* Recover any CoW blocks that never got remapped. */
1372                 error = xfs_reflink_recover_cow(mp);
1373                 if (error) {
1374                         xfs_err(mp,
1375         "Error %d recovering leftover CoW allocations.", error);
1376                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1377                         return error;
1378                 }
1379                 xfs_icache_enable_reclaim(mp);
1380
1381                 /* Create the per-AG metadata reservation pool .*/
1382                 error = xfs_fs_reserve_ag_blocks(mp);
1383                 if (error && error != -ENOSPC)
1384                         return error;
1385         }
1386
1387         /* rw -> ro */
1388         if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1389                 /*
1390                  * Cancel background eofb scanning so it cannot race with the
1391                  * final log force+buftarg wait and deadlock the remount.
1392                  */
1393                 xfs_icache_disable_reclaim(mp);
1394
1395                 /* Get rid of any leftover CoW reservations... */
1396                 error = xfs_icache_free_cowblocks(mp, NULL);
1397                 if (error) {
1398                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1399                         return error;
1400                 }
1401
1402                 /* Free the per-AG metadata reservation pool. */
1403                 error = xfs_fs_unreserve_ag_blocks(mp);
1404                 if (error) {
1405                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1406                         return error;
1407                 }
1408
1409                 /*
1410                  * Before we sync the metadata, we need to free up the reserve
1411                  * block pool so that the used block count in the superblock on
1412                  * disk is correct at the end of the remount. Stash the current
1413                  * reserve pool size so that if we get remounted rw, we can
1414                  * return it to the same size.
1415                  */
1416                 xfs_save_resvblks(mp);
1417
1418                 xfs_quiesce_attr(mp);
1419                 mp->m_flags |= XFS_MOUNT_RDONLY;
1420         }
1421
1422         return 0;
1423 }
1424
1425 /*
1426  * Second stage of a freeze. The data is already frozen so we only
1427  * need to take care of the metadata. Once that's done sync the superblock
1428  * to the log to dirty it in case of a crash while frozen. This ensures that we
1429  * will recover the unlinked inode lists on the next mount.
1430  */
1431 STATIC int
1432 xfs_fs_freeze(
1433         struct super_block      *sb)
1434 {
1435         struct xfs_mount        *mp = XFS_M(sb);
1436
1437         xfs_icache_disable_reclaim(mp);
1438         xfs_save_resvblks(mp);
1439         xfs_quiesce_attr(mp);
1440         return xfs_sync_sb(mp, true);
1441 }
1442
1443 STATIC int
1444 xfs_fs_unfreeze(
1445         struct super_block      *sb)
1446 {
1447         struct xfs_mount        *mp = XFS_M(sb);
1448
1449         xfs_restore_resvblks(mp);
1450         xfs_log_work_queue(mp);
1451         xfs_icache_enable_reclaim(mp);
1452         return 0;
1453 }
1454
1455 STATIC int
1456 xfs_fs_show_options(
1457         struct seq_file         *m,
1458         struct dentry           *root)
1459 {
1460         return xfs_showargs(XFS_M(root->d_sb), m);
1461 }
1462
1463 /*
1464  * This function fills in xfs_mount_t fields based on mount args.
1465  * Note: the superblock _has_ now been read in.
1466  */
1467 STATIC int
1468 xfs_finish_flags(
1469         struct xfs_mount        *mp)
1470 {
1471         int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1472
1473         /* Fail a mount where the logbuf is smaller than the log stripe */
1474         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1475                 if (mp->m_logbsize <= 0 &&
1476                     mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1477                         mp->m_logbsize = mp->m_sb.sb_logsunit;
1478                 } else if (mp->m_logbsize > 0 &&
1479                            mp->m_logbsize < mp->m_sb.sb_logsunit) {
1480                         xfs_warn(mp,
1481                 "logbuf size must be greater than or equal to log stripe size");
1482                         return -EINVAL;
1483                 }
1484         } else {
1485                 /* Fail a mount if the logbuf is larger than 32K */
1486                 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1487                         xfs_warn(mp,
1488                 "logbuf size for version 1 logs must be 16K or 32K");
1489                         return -EINVAL;
1490                 }
1491         }
1492
1493         /*
1494          * V5 filesystems always use attr2 format for attributes.
1495          */
1496         if (xfs_sb_version_hascrc(&mp->m_sb) &&
1497             (mp->m_flags & XFS_MOUNT_NOATTR2)) {
1498                 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1499                              "attr2 is always enabled for V5 filesystems.");
1500                 return -EINVAL;
1501         }
1502
1503         /*
1504          * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1505          * told by noattr2 to turn it off
1506          */
1507         if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1508             !(mp->m_flags & XFS_MOUNT_NOATTR2))
1509                 mp->m_flags |= XFS_MOUNT_ATTR2;
1510
1511         /*
1512          * prohibit r/w mounts of read-only filesystems
1513          */
1514         if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1515                 xfs_warn(mp,
1516                         "cannot mount a read-only filesystem as read-write");
1517                 return -EROFS;
1518         }
1519
1520         if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
1521             (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
1522             !xfs_sb_version_has_pquotino(&mp->m_sb)) {
1523                 xfs_warn(mp,
1524                   "Super block does not support project and group quota together");
1525                 return -EINVAL;
1526         }
1527
1528         return 0;
1529 }
1530
1531 static int
1532 xfs_init_percpu_counters(
1533         struct xfs_mount        *mp)
1534 {
1535         int             error;
1536
1537         error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1538         if (error)
1539                 return -ENOMEM;
1540
1541         error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1542         if (error)
1543                 goto free_icount;
1544
1545         error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1546         if (error)
1547                 goto free_ifree;
1548
1549         return 0;
1550
1551 free_ifree:
1552         percpu_counter_destroy(&mp->m_ifree);
1553 free_icount:
1554         percpu_counter_destroy(&mp->m_icount);
1555         return -ENOMEM;
1556 }
1557
1558 void
1559 xfs_reinit_percpu_counters(
1560         struct xfs_mount        *mp)
1561 {
1562         percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1563         percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1564         percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1565 }
1566
1567 static void
1568 xfs_destroy_percpu_counters(
1569         struct xfs_mount        *mp)
1570 {
1571         percpu_counter_destroy(&mp->m_icount);
1572         percpu_counter_destroy(&mp->m_ifree);
1573         percpu_counter_destroy(&mp->m_fdblocks);
1574 }
1575
1576 static struct xfs_mount *
1577 xfs_mount_alloc(
1578         struct super_block      *sb)
1579 {
1580         struct xfs_mount        *mp;
1581
1582         mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1583         if (!mp)
1584                 return NULL;
1585
1586         mp->m_super = sb;
1587         spin_lock_init(&mp->m_sb_lock);
1588         spin_lock_init(&mp->m_agirotor_lock);
1589         INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1590         spin_lock_init(&mp->m_perag_lock);
1591         mutex_init(&mp->m_growlock);
1592         atomic_set(&mp->m_active_trans, 0);
1593         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1594         INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1595         INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1596         mp->m_kobj.kobject.kset = xfs_kset;
1597         /*
1598          * We don't create the finobt per-ag space reservation until after log
1599          * recovery, so we must set this to true so that an ifree transaction
1600          * started during log recovery will not depend on space reservations
1601          * for finobt expansion.
1602          */
1603         mp->m_finobt_nores = true;
1604         return mp;
1605 }
1606
1607
1608 STATIC int
1609 xfs_fs_fill_super(
1610         struct super_block      *sb,
1611         void                    *data,
1612         int                     silent)
1613 {
1614         struct inode            *root;
1615         struct xfs_mount        *mp = NULL;
1616         int                     flags = 0, error = -ENOMEM;
1617
1618         /*
1619          * allocate mp and do all low-level struct initializations before we
1620          * attach it to the super
1621          */
1622         mp = xfs_mount_alloc(sb);
1623         if (!mp)
1624                 goto out;
1625         sb->s_fs_info = mp;
1626
1627         error = xfs_parseargs(mp, (char *)data);
1628         if (error)
1629                 goto out_free_fsname;
1630
1631         sb_min_blocksize(sb, BBSIZE);
1632         sb->s_xattr = xfs_xattr_handlers;
1633         sb->s_export_op = &xfs_export_operations;
1634 #ifdef CONFIG_XFS_QUOTA
1635         sb->s_qcop = &xfs_quotactl_operations;
1636         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1637 #endif
1638         sb->s_op = &xfs_super_operations;
1639
1640         /*
1641          * Delay mount work if the debug hook is set. This is debug
1642          * instrumention to coordinate simulation of xfs mount failures with
1643          * VFS superblock operations
1644          */
1645         if (xfs_globals.mount_delay) {
1646                 xfs_notice(mp, "Delaying mount for %d seconds.",
1647                         xfs_globals.mount_delay);
1648                 msleep(xfs_globals.mount_delay * 1000);
1649         }
1650
1651         if (silent)
1652                 flags |= XFS_MFSI_QUIET;
1653
1654         error = xfs_open_devices(mp);
1655         if (error)
1656                 goto out_free_fsname;
1657
1658         error = xfs_init_mount_workqueues(mp);
1659         if (error)
1660                 goto out_close_devices;
1661
1662         error = xfs_init_percpu_counters(mp);
1663         if (error)
1664                 goto out_destroy_workqueues;
1665
1666         /* Allocate stats memory before we do operations that might use it */
1667         mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1668         if (!mp->m_stats.xs_stats) {
1669                 error = -ENOMEM;
1670                 goto out_destroy_counters;
1671         }
1672
1673         error = xfs_readsb(mp, flags);
1674         if (error)
1675                 goto out_free_stats;
1676
1677         error = xfs_finish_flags(mp);
1678         if (error)
1679                 goto out_free_sb;
1680
1681         error = xfs_setup_devices(mp);
1682         if (error)
1683                 goto out_free_sb;
1684
1685         error = xfs_filestream_mount(mp);
1686         if (error)
1687                 goto out_free_sb;
1688
1689         /*
1690          * we must configure the block size in the superblock before we run the
1691          * full mount process as the mount process can lookup and cache inodes.
1692          */
1693         sb->s_magic = XFS_SUPER_MAGIC;
1694         sb->s_blocksize = mp->m_sb.sb_blocksize;
1695         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1696         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1697         sb->s_max_links = XFS_MAXLINK;
1698         sb->s_time_gran = 1;
1699         set_posix_acl_flag(sb);
1700
1701         /* version 5 superblocks support inode version counters. */
1702         if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1703                 sb->s_flags |= SB_I_VERSION;
1704
1705         if (mp->m_flags & XFS_MOUNT_DAX) {
1706                 bool rtdev_is_dax = false, datadev_is_dax;
1707
1708                 xfs_warn(mp,
1709                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1710
1711                 datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1712                         sb->s_blocksize);
1713                 if (mp->m_rtdev_targp)
1714                         rtdev_is_dax = bdev_dax_supported(
1715                                 mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1716                 if (!rtdev_is_dax && !datadev_is_dax) {
1717                         xfs_alert(mp,
1718                         "DAX unsupported by block device. Turning off DAX.");
1719                         mp->m_flags &= ~XFS_MOUNT_DAX;
1720                 }
1721                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1722                         xfs_alert(mp,
1723                 "DAX and reflink cannot be used together!");
1724                         error = -EINVAL;
1725                         goto out_filestream_unmount;
1726                 }
1727         }
1728
1729         if (mp->m_flags & XFS_MOUNT_DISCARD) {
1730                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
1731
1732                 if (!blk_queue_discard(q)) {
1733                         xfs_warn(mp, "mounting with \"discard\" option, but "
1734                                         "the device does not support discard");
1735                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
1736                 }
1737         }
1738
1739         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1740                 if (mp->m_sb.sb_rblocks) {
1741                         xfs_alert(mp,
1742         "reflink not compatible with realtime device!");
1743                         error = -EINVAL;
1744                         goto out_filestream_unmount;
1745                 }
1746
1747                 if (xfs_globals.always_cow) {
1748                         xfs_info(mp, "using DEBUG-only always_cow mode.");
1749                         mp->m_always_cow = true;
1750                 }
1751         }
1752
1753         if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1754                 xfs_alert(mp,
1755         "reverse mapping btree not compatible with realtime device!");
1756                 error = -EINVAL;
1757                 goto out_filestream_unmount;
1758         }
1759
1760         error = xfs_mountfs(mp);
1761         if (error)
1762                 goto out_filestream_unmount;
1763
1764         root = igrab(VFS_I(mp->m_rootip));
1765         if (!root) {
1766                 error = -ENOENT;
1767                 goto out_unmount;
1768         }
1769         sb->s_root = d_make_root(root);
1770         if (!sb->s_root) {
1771                 error = -ENOMEM;
1772                 goto out_unmount;
1773         }
1774
1775         return 0;
1776
1777  out_filestream_unmount:
1778         xfs_filestream_unmount(mp);
1779  out_free_sb:
1780         xfs_freesb(mp);
1781  out_free_stats:
1782         free_percpu(mp->m_stats.xs_stats);
1783  out_destroy_counters:
1784         xfs_destroy_percpu_counters(mp);
1785  out_destroy_workqueues:
1786         xfs_destroy_mount_workqueues(mp);
1787  out_close_devices:
1788         xfs_close_devices(mp);
1789  out_free_fsname:
1790         sb->s_fs_info = NULL;
1791         xfs_free_fsname(mp);
1792         kfree(mp);
1793  out:
1794         return error;
1795
1796  out_unmount:
1797         xfs_filestream_unmount(mp);
1798         xfs_unmountfs(mp);
1799         goto out_free_sb;
1800 }
1801
1802 STATIC void
1803 xfs_fs_put_super(
1804         struct super_block      *sb)
1805 {
1806         struct xfs_mount        *mp = XFS_M(sb);
1807
1808         /* if ->fill_super failed, we have no mount to tear down */
1809         if (!sb->s_fs_info)
1810                 return;
1811
1812         xfs_notice(mp, "Unmounting Filesystem");
1813         xfs_filestream_unmount(mp);
1814         xfs_unmountfs(mp);
1815
1816         xfs_freesb(mp);
1817         free_percpu(mp->m_stats.xs_stats);
1818         xfs_destroy_percpu_counters(mp);
1819         xfs_destroy_mount_workqueues(mp);
1820         xfs_close_devices(mp);
1821
1822         sb->s_fs_info = NULL;
1823         xfs_free_fsname(mp);
1824         kfree(mp);
1825 }
1826
1827 STATIC struct dentry *
1828 xfs_fs_mount(
1829         struct file_system_type *fs_type,
1830         int                     flags,
1831         const char              *dev_name,
1832         void                    *data)
1833 {
1834         return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1835 }
1836
1837 static long
1838 xfs_fs_nr_cached_objects(
1839         struct super_block      *sb,
1840         struct shrink_control   *sc)
1841 {
1842         /* Paranoia: catch incorrect calls during mount setup or teardown */
1843         if (WARN_ON_ONCE(!sb->s_fs_info))
1844                 return 0;
1845         return xfs_reclaim_inodes_count(XFS_M(sb));
1846 }
1847
1848 static long
1849 xfs_fs_free_cached_objects(
1850         struct super_block      *sb,
1851         struct shrink_control   *sc)
1852 {
1853         return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1854 }
1855
1856 static const struct super_operations xfs_super_operations = {
1857         .alloc_inode            = xfs_fs_alloc_inode,
1858         .destroy_inode          = xfs_fs_destroy_inode,
1859         .dirty_inode            = xfs_fs_dirty_inode,
1860         .drop_inode             = xfs_fs_drop_inode,
1861         .put_super              = xfs_fs_put_super,
1862         .sync_fs                = xfs_fs_sync_fs,
1863         .freeze_fs              = xfs_fs_freeze,
1864         .unfreeze_fs            = xfs_fs_unfreeze,
1865         .statfs                 = xfs_fs_statfs,
1866         .remount_fs             = xfs_fs_remount,
1867         .show_options           = xfs_fs_show_options,
1868         .nr_cached_objects      = xfs_fs_nr_cached_objects,
1869         .free_cached_objects    = xfs_fs_free_cached_objects,
1870 };
1871
1872 static struct file_system_type xfs_fs_type = {
1873         .owner                  = THIS_MODULE,
1874         .name                   = "xfs",
1875         .mount                  = xfs_fs_mount,
1876         .kill_sb                = kill_block_super,
1877         .fs_flags               = FS_REQUIRES_DEV,
1878 };
1879 MODULE_ALIAS_FS("xfs");
1880
1881 STATIC int __init
1882 xfs_init_zones(void)
1883 {
1884         if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
1885                         offsetof(struct xfs_ioend, io_inline_bio),
1886                         BIOSET_NEED_BVECS))
1887                 goto out;
1888
1889         xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1890                                                 "xfs_log_ticket");
1891         if (!xfs_log_ticket_zone)
1892                 goto out_free_ioend_bioset;
1893
1894         xfs_bmap_free_item_zone = kmem_zone_init(
1895                         sizeof(struct xfs_extent_free_item),
1896                         "xfs_bmap_free_item");
1897         if (!xfs_bmap_free_item_zone)
1898                 goto out_destroy_log_ticket_zone;
1899
1900         xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1901                                                 "xfs_btree_cur");
1902         if (!xfs_btree_cur_zone)
1903                 goto out_destroy_bmap_free_item_zone;
1904
1905         xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1906                                                 "xfs_da_state");
1907         if (!xfs_da_state_zone)
1908                 goto out_destroy_btree_cur_zone;
1909
1910         xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
1911         if (!xfs_ifork_zone)
1912                 goto out_destroy_da_state_zone;
1913
1914         xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1915         if (!xfs_trans_zone)
1916                 goto out_destroy_ifork_zone;
1917
1918
1919         /*
1920          * The size of the zone allocated buf log item is the maximum
1921          * size possible under XFS.  This wastes a little bit of memory,
1922          * but it is much faster.
1923          */
1924         xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
1925                                            "xfs_buf_item");
1926         if (!xfs_buf_item_zone)
1927                 goto out_destroy_trans_zone;
1928
1929         xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1930                         ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1931                                  sizeof(xfs_extent_t))), "xfs_efd_item");
1932         if (!xfs_efd_zone)
1933                 goto out_destroy_buf_item_zone;
1934
1935         xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1936                         ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1937                                 sizeof(xfs_extent_t))), "xfs_efi_item");
1938         if (!xfs_efi_zone)
1939                 goto out_destroy_efd_zone;
1940
1941         xfs_inode_zone =
1942                 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1943                         KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
1944                         KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
1945         if (!xfs_inode_zone)
1946                 goto out_destroy_efi_zone;
1947
1948         xfs_ili_zone =
1949                 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
1950                                         KM_ZONE_SPREAD, NULL);
1951         if (!xfs_ili_zone)
1952                 goto out_destroy_inode_zone;
1953         xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
1954                                         "xfs_icr");
1955         if (!xfs_icreate_zone)
1956                 goto out_destroy_ili_zone;
1957
1958         xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1959                         "xfs_rud_item");
1960         if (!xfs_rud_zone)
1961                 goto out_destroy_icreate_zone;
1962
1963         xfs_rui_zone = kmem_zone_init(
1964                         xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1965                         "xfs_rui_item");
1966         if (!xfs_rui_zone)
1967                 goto out_destroy_rud_zone;
1968
1969         xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
1970                         "xfs_cud_item");
1971         if (!xfs_cud_zone)
1972                 goto out_destroy_rui_zone;
1973
1974         xfs_cui_zone = kmem_zone_init(
1975                         xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1976                         "xfs_cui_item");
1977         if (!xfs_cui_zone)
1978                 goto out_destroy_cud_zone;
1979
1980         xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
1981                         "xfs_bud_item");
1982         if (!xfs_bud_zone)
1983                 goto out_destroy_cui_zone;
1984
1985         xfs_bui_zone = kmem_zone_init(
1986                         xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1987                         "xfs_bui_item");
1988         if (!xfs_bui_zone)
1989                 goto out_destroy_bud_zone;
1990
1991         return 0;
1992
1993  out_destroy_bud_zone:
1994         kmem_zone_destroy(xfs_bud_zone);
1995  out_destroy_cui_zone:
1996         kmem_zone_destroy(xfs_cui_zone);
1997  out_destroy_cud_zone:
1998         kmem_zone_destroy(xfs_cud_zone);
1999  out_destroy_rui_zone:
2000         kmem_zone_destroy(xfs_rui_zone);
2001  out_destroy_rud_zone:
2002         kmem_zone_destroy(xfs_rud_zone);
2003  out_destroy_icreate_zone:
2004         kmem_zone_destroy(xfs_icreate_zone);
2005  out_destroy_ili_zone:
2006         kmem_zone_destroy(xfs_ili_zone);
2007  out_destroy_inode_zone:
2008         kmem_zone_destroy(xfs_inode_zone);
2009  out_destroy_efi_zone:
2010         kmem_zone_destroy(xfs_efi_zone);
2011  out_destroy_efd_zone:
2012         kmem_zone_destroy(xfs_efd_zone);
2013  out_destroy_buf_item_zone:
2014         kmem_zone_destroy(xfs_buf_item_zone);
2015  out_destroy_trans_zone:
2016         kmem_zone_destroy(xfs_trans_zone);
2017  out_destroy_ifork_zone:
2018         kmem_zone_destroy(xfs_ifork_zone);
2019  out_destroy_da_state_zone:
2020         kmem_zone_destroy(xfs_da_state_zone);
2021  out_destroy_btree_cur_zone:
2022         kmem_zone_destroy(xfs_btree_cur_zone);
2023  out_destroy_bmap_free_item_zone:
2024         kmem_zone_destroy(xfs_bmap_free_item_zone);
2025  out_destroy_log_ticket_zone:
2026         kmem_zone_destroy(xfs_log_ticket_zone);
2027  out_free_ioend_bioset:
2028         bioset_exit(&xfs_ioend_bioset);
2029  out:
2030         return -ENOMEM;
2031 }
2032
2033 STATIC void
2034 xfs_destroy_zones(void)
2035 {
2036         /*
2037          * Make sure all delayed rcu free are flushed before we
2038          * destroy caches.
2039          */
2040         rcu_barrier();
2041         kmem_zone_destroy(xfs_bui_zone);
2042         kmem_zone_destroy(xfs_bud_zone);
2043         kmem_zone_destroy(xfs_cui_zone);
2044         kmem_zone_destroy(xfs_cud_zone);
2045         kmem_zone_destroy(xfs_rui_zone);
2046         kmem_zone_destroy(xfs_rud_zone);
2047         kmem_zone_destroy(xfs_icreate_zone);
2048         kmem_zone_destroy(xfs_ili_zone);
2049         kmem_zone_destroy(xfs_inode_zone);
2050         kmem_zone_destroy(xfs_efi_zone);
2051         kmem_zone_destroy(xfs_efd_zone);
2052         kmem_zone_destroy(xfs_buf_item_zone);
2053         kmem_zone_destroy(xfs_trans_zone);
2054         kmem_zone_destroy(xfs_ifork_zone);
2055         kmem_zone_destroy(xfs_da_state_zone);
2056         kmem_zone_destroy(xfs_btree_cur_zone);
2057         kmem_zone_destroy(xfs_bmap_free_item_zone);
2058         kmem_zone_destroy(xfs_log_ticket_zone);
2059         bioset_exit(&xfs_ioend_bioset);
2060 }
2061
2062 STATIC int __init
2063 xfs_init_workqueues(void)
2064 {
2065         /*
2066          * The allocation workqueue can be used in memory reclaim situations
2067          * (writepage path), and parallelism is only limited by the number of
2068          * AGs in all the filesystems mounted. Hence use the default large
2069          * max_active value for this workqueue.
2070          */
2071         xfs_alloc_wq = alloc_workqueue("xfsalloc",
2072                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2073         if (!xfs_alloc_wq)
2074                 return -ENOMEM;
2075
2076         xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2077         if (!xfs_discard_wq)
2078                 goto out_free_alloc_wq;
2079
2080         return 0;
2081 out_free_alloc_wq:
2082         destroy_workqueue(xfs_alloc_wq);
2083         return -ENOMEM;
2084 }
2085
2086 STATIC void
2087 xfs_destroy_workqueues(void)
2088 {
2089         destroy_workqueue(xfs_discard_wq);
2090         destroy_workqueue(xfs_alloc_wq);
2091 }
2092
2093 STATIC int __init
2094 init_xfs_fs(void)
2095 {
2096         int                     error;
2097
2098         xfs_check_ondisk_structs();
2099
2100         printk(KERN_INFO XFS_VERSION_STRING " with "
2101                          XFS_BUILD_OPTIONS " enabled\n");
2102
2103         xfs_dir_startup();
2104
2105         error = xfs_init_zones();
2106         if (error)
2107                 goto out;
2108
2109         error = xfs_init_workqueues();
2110         if (error)
2111                 goto out_destroy_zones;
2112
2113         error = xfs_mru_cache_init();
2114         if (error)
2115                 goto out_destroy_wq;
2116
2117         error = xfs_buf_init();
2118         if (error)
2119                 goto out_mru_cache_uninit;
2120
2121         error = xfs_init_procfs();
2122         if (error)
2123                 goto out_buf_terminate;
2124
2125         error = xfs_sysctl_register();
2126         if (error)
2127                 goto out_cleanup_procfs;
2128
2129         xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2130         if (!xfs_kset) {
2131                 error = -ENOMEM;
2132                 goto out_sysctl_unregister;
2133         }
2134
2135         xfsstats.xs_kobj.kobject.kset = xfs_kset;
2136
2137         xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2138         if (!xfsstats.xs_stats) {
2139                 error = -ENOMEM;
2140                 goto out_kset_unregister;
2141         }
2142
2143         error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2144                                "stats");
2145         if (error)
2146                 goto out_free_stats;
2147
2148 #ifdef DEBUG
2149         xfs_dbg_kobj.kobject.kset = xfs_kset;
2150         error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2151         if (error)
2152                 goto out_remove_stats_kobj;
2153 #endif
2154
2155         error = xfs_qm_init();
2156         if (error)
2157                 goto out_remove_dbg_kobj;
2158
2159         error = register_filesystem(&xfs_fs_type);
2160         if (error)
2161                 goto out_qm_exit;
2162         return 0;
2163
2164  out_qm_exit:
2165         xfs_qm_exit();
2166  out_remove_dbg_kobj:
2167 #ifdef DEBUG
2168         xfs_sysfs_del(&xfs_dbg_kobj);
2169  out_remove_stats_kobj:
2170 #endif
2171         xfs_sysfs_del(&xfsstats.xs_kobj);
2172  out_free_stats:
2173         free_percpu(xfsstats.xs_stats);
2174  out_kset_unregister:
2175         kset_unregister(xfs_kset);
2176  out_sysctl_unregister:
2177         xfs_sysctl_unregister();
2178  out_cleanup_procfs:
2179         xfs_cleanup_procfs();
2180  out_buf_terminate:
2181         xfs_buf_terminate();
2182  out_mru_cache_uninit:
2183         xfs_mru_cache_uninit();
2184  out_destroy_wq:
2185         xfs_destroy_workqueues();
2186  out_destroy_zones:
2187         xfs_destroy_zones();
2188  out:
2189         return error;
2190 }
2191
2192 STATIC void __exit
2193 exit_xfs_fs(void)
2194 {
2195         xfs_qm_exit();
2196         unregister_filesystem(&xfs_fs_type);
2197 #ifdef DEBUG
2198         xfs_sysfs_del(&xfs_dbg_kobj);
2199 #endif
2200         xfs_sysfs_del(&xfsstats.xs_kobj);
2201         free_percpu(xfsstats.xs_stats);
2202         kset_unregister(xfs_kset);
2203         xfs_sysctl_unregister();
2204         xfs_cleanup_procfs();
2205         xfs_buf_terminate();
2206         xfs_mru_cache_uninit();
2207         xfs_destroy_workqueues();
2208         xfs_destroy_zones();
2209         xfs_uuid_table_free();
2210 }
2211
2212 module_init(init_xfs_fs);
2213 module_exit(exit_xfs_fs);
2214
2215 MODULE_AUTHOR("Silicon Graphics, Inc.");
2216 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2217 MODULE_LICENSE("GPL");