Merge branch 'core/speculation' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / fs / xfs / xfs_super.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_da_format.h"
15 #include "xfs_inode.h"
16 #include "xfs_btree.h"
17 #include "xfs_bmap.h"
18 #include "xfs_alloc.h"
19 #include "xfs_error.h"
20 #include "xfs_fsops.h"
21 #include "xfs_trans.h"
22 #include "xfs_buf_item.h"
23 #include "xfs_log.h"
24 #include "xfs_log_priv.h"
25 #include "xfs_da_btree.h"
26 #include "xfs_dir2.h"
27 #include "xfs_extfree_item.h"
28 #include "xfs_mru_cache.h"
29 #include "xfs_inode_item.h"
30 #include "xfs_icache.h"
31 #include "xfs_trace.h"
32 #include "xfs_icreate_item.h"
33 #include "xfs_filestream.h"
34 #include "xfs_quota.h"
35 #include "xfs_sysfs.h"
36 #include "xfs_ondisk.h"
37 #include "xfs_rmap_item.h"
38 #include "xfs_refcount_item.h"
39 #include "xfs_bmap_item.h"
40 #include "xfs_reflink.h"
41 #include "xfs_defer.h"
42
43 #include <linux/namei.h>
44 #include <linux/dax.h>
45 #include <linux/init.h>
46 #include <linux/slab.h>
47 #include <linux/magic.h>
48 #include <linux/mount.h>
49 #include <linux/mempool.h>
50 #include <linux/writeback.h>
51 #include <linux/kthread.h>
52 #include <linux/freezer.h>
53 #include <linux/parser.h>
54
55 static const struct super_operations xfs_super_operations;
56 struct bio_set xfs_ioend_bioset;
57
58 static struct kset *xfs_kset;           /* top-level xfs sysfs dir */
59 #ifdef DEBUG
60 static struct xfs_kobj xfs_dbg_kobj;    /* global debug sysfs attrs */
61 #endif
62
63 /*
64  * Table driven mount option parser.
65  */
66 enum {
67         Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
68         Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
69         Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
70         Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
71         Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
72         Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
73         Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
74         Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
75         Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
76 };
77
78 static const match_table_t tokens = {
79         {Opt_logbufs,   "logbufs=%u"},  /* number of XFS log buffers */
80         {Opt_logbsize,  "logbsize=%s"}, /* size of XFS log buffers */
81         {Opt_logdev,    "logdev=%s"},   /* log device */
82         {Opt_rtdev,     "rtdev=%s"},    /* realtime I/O device */
83         {Opt_biosize,   "biosize=%u"},  /* log2 of preferred buffered io size */
84         {Opt_wsync,     "wsync"},       /* safe-mode nfs compatible mount */
85         {Opt_noalign,   "noalign"},     /* turn off stripe alignment */
86         {Opt_swalloc,   "swalloc"},     /* turn on stripe width allocation */
87         {Opt_sunit,     "sunit=%u"},    /* data volume stripe unit */
88         {Opt_swidth,    "swidth=%u"},   /* data volume stripe width */
89         {Opt_nouuid,    "nouuid"},      /* ignore filesystem UUID */
90         {Opt_mtpt,      "mtpt"},        /* filesystem mount point */
91         {Opt_grpid,     "grpid"},       /* group-ID from parent directory */
92         {Opt_nogrpid,   "nogrpid"},     /* group-ID from current process */
93         {Opt_bsdgroups, "bsdgroups"},   /* group-ID from parent directory */
94         {Opt_sysvgroups,"sysvgroups"},  /* group-ID from current process */
95         {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
96         {Opt_norecovery,"norecovery"},  /* don't run XFS recovery */
97         {Opt_inode64,   "inode64"},     /* inodes can be allocated anywhere */
98         {Opt_inode32,   "inode32"},     /* inode allocation limited to
99                                          * XFS_MAXINUMBER_32 */
100         {Opt_ikeep,     "ikeep"},       /* do not free empty inode clusters */
101         {Opt_noikeep,   "noikeep"},     /* free empty inode clusters */
102         {Opt_largeio,   "largeio"},     /* report large I/O sizes in stat() */
103         {Opt_nolargeio, "nolargeio"},   /* do not report large I/O sizes
104                                          * in stat(). */
105         {Opt_attr2,     "attr2"},       /* do use attr2 attribute format */
106         {Opt_noattr2,   "noattr2"},     /* do not use attr2 attribute format */
107         {Opt_filestreams,"filestreams"},/* use filestreams allocator */
108         {Opt_quota,     "quota"},       /* disk quotas (user) */
109         {Opt_noquota,   "noquota"},     /* no quotas */
110         {Opt_usrquota,  "usrquota"},    /* user quota enabled */
111         {Opt_grpquota,  "grpquota"},    /* group quota enabled */
112         {Opt_prjquota,  "prjquota"},    /* project quota enabled */
113         {Opt_uquota,    "uquota"},      /* user quota (IRIX variant) */
114         {Opt_gquota,    "gquota"},      /* group quota (IRIX variant) */
115         {Opt_pquota,    "pquota"},      /* project quota (IRIX variant) */
116         {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
117         {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
118         {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
119         {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
120         {Opt_discard,   "discard"},     /* Discard unused blocks */
121         {Opt_nodiscard, "nodiscard"},   /* Do not discard unused blocks */
122         {Opt_dax,       "dax"},         /* Enable direct access to bdev pages */
123         {Opt_err,       NULL},
124 };
125
126
127 STATIC int
128 suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
129 {
130         int     last, shift_left_factor = 0, _res;
131         char    *value;
132         int     ret = 0;
133
134         value = match_strdup(s);
135         if (!value)
136                 return -ENOMEM;
137
138         last = strlen(value) - 1;
139         if (value[last] == 'K' || value[last] == 'k') {
140                 shift_left_factor = 10;
141                 value[last] = '\0';
142         }
143         if (value[last] == 'M' || value[last] == 'm') {
144                 shift_left_factor = 20;
145                 value[last] = '\0';
146         }
147         if (value[last] == 'G' || value[last] == 'g') {
148                 shift_left_factor = 30;
149                 value[last] = '\0';
150         }
151
152         if (kstrtoint(value, base, &_res))
153                 ret = -EINVAL;
154         kfree(value);
155         *res = _res << shift_left_factor;
156         return ret;
157 }
158
159 /*
160  * This function fills in xfs_mount_t fields based on mount args.
161  * Note: the superblock has _not_ yet been read in.
162  *
163  * Note that this function leaks the various device name allocations on
164  * failure.  The caller takes care of them.
165  *
166  * *sb is const because this is also used to test options on the remount
167  * path, and we don't want this to have any side effects at remount time.
168  * Today this function does not change *sb, but just to future-proof...
169  */
170 STATIC int
171 xfs_parseargs(
172         struct xfs_mount        *mp,
173         char                    *options)
174 {
175         const struct super_block *sb = mp->m_super;
176         char                    *p;
177         substring_t             args[MAX_OPT_ARGS];
178         int                     dsunit = 0;
179         int                     dswidth = 0;
180         int                     iosize = 0;
181         uint8_t                 iosizelog = 0;
182
183         /*
184          * set up the mount name first so all the errors will refer to the
185          * correct device.
186          */
187         mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
188         if (!mp->m_fsname)
189                 return -ENOMEM;
190         mp->m_fsname_len = strlen(mp->m_fsname) + 1;
191
192         /*
193          * Copy binary VFS mount flags we are interested in.
194          */
195         if (sb_rdonly(sb))
196                 mp->m_flags |= XFS_MOUNT_RDONLY;
197         if (sb->s_flags & SB_DIRSYNC)
198                 mp->m_flags |= XFS_MOUNT_DIRSYNC;
199         if (sb->s_flags & SB_SYNCHRONOUS)
200                 mp->m_flags |= XFS_MOUNT_WSYNC;
201
202         /*
203          * Set some default flags that could be cleared by the mount option
204          * parsing.
205          */
206         mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
207
208         /*
209          * These can be overridden by the mount option parsing.
210          */
211         mp->m_logbufs = -1;
212         mp->m_logbsize = -1;
213
214         if (!options)
215                 goto done;
216
217         while ((p = strsep(&options, ",")) != NULL) {
218                 int             token;
219
220                 if (!*p)
221                         continue;
222
223                 token = match_token(p, tokens, args);
224                 switch (token) {
225                 case Opt_logbufs:
226                         if (match_int(args, &mp->m_logbufs))
227                                 return -EINVAL;
228                         break;
229                 case Opt_logbsize:
230                         if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
231                                 return -EINVAL;
232                         break;
233                 case Opt_logdev:
234                         kfree(mp->m_logname);
235                         mp->m_logname = match_strdup(args);
236                         if (!mp->m_logname)
237                                 return -ENOMEM;
238                         break;
239                 case Opt_mtpt:
240                         xfs_warn(mp, "%s option not allowed on this system", p);
241                         return -EINVAL;
242                 case Opt_rtdev:
243                         kfree(mp->m_rtname);
244                         mp->m_rtname = match_strdup(args);
245                         if (!mp->m_rtname)
246                                 return -ENOMEM;
247                         break;
248                 case Opt_allocsize:
249                 case Opt_biosize:
250                         if (suffix_kstrtoint(args, 10, &iosize))
251                                 return -EINVAL;
252                         iosizelog = ffs(iosize) - 1;
253                         break;
254                 case Opt_grpid:
255                 case Opt_bsdgroups:
256                         mp->m_flags |= XFS_MOUNT_GRPID;
257                         break;
258                 case Opt_nogrpid:
259                 case Opt_sysvgroups:
260                         mp->m_flags &= ~XFS_MOUNT_GRPID;
261                         break;
262                 case Opt_wsync:
263                         mp->m_flags |= XFS_MOUNT_WSYNC;
264                         break;
265                 case Opt_norecovery:
266                         mp->m_flags |= XFS_MOUNT_NORECOVERY;
267                         break;
268                 case Opt_noalign:
269                         mp->m_flags |= XFS_MOUNT_NOALIGN;
270                         break;
271                 case Opt_swalloc:
272                         mp->m_flags |= XFS_MOUNT_SWALLOC;
273                         break;
274                 case Opt_sunit:
275                         if (match_int(args, &dsunit))
276                                 return -EINVAL;
277                         break;
278                 case Opt_swidth:
279                         if (match_int(args, &dswidth))
280                                 return -EINVAL;
281                         break;
282                 case Opt_inode32:
283                         mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
284                         break;
285                 case Opt_inode64:
286                         mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
287                         break;
288                 case Opt_nouuid:
289                         mp->m_flags |= XFS_MOUNT_NOUUID;
290                         break;
291                 case Opt_ikeep:
292                         mp->m_flags |= XFS_MOUNT_IKEEP;
293                         break;
294                 case Opt_noikeep:
295                         mp->m_flags &= ~XFS_MOUNT_IKEEP;
296                         break;
297                 case Opt_largeio:
298                         mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
299                         break;
300                 case Opt_nolargeio:
301                         mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
302                         break;
303                 case Opt_attr2:
304                         mp->m_flags |= XFS_MOUNT_ATTR2;
305                         break;
306                 case Opt_noattr2:
307                         mp->m_flags &= ~XFS_MOUNT_ATTR2;
308                         mp->m_flags |= XFS_MOUNT_NOATTR2;
309                         break;
310                 case Opt_filestreams:
311                         mp->m_flags |= XFS_MOUNT_FILESTREAMS;
312                         break;
313                 case Opt_noquota:
314                         mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
315                         mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
316                         mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
317                         break;
318                 case Opt_quota:
319                 case Opt_uquota:
320                 case Opt_usrquota:
321                         mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
322                                          XFS_UQUOTA_ENFD);
323                         break;
324                 case Opt_qnoenforce:
325                 case Opt_uqnoenforce:
326                         mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
327                         mp->m_qflags &= ~XFS_UQUOTA_ENFD;
328                         break;
329                 case Opt_pquota:
330                 case Opt_prjquota:
331                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
332                                          XFS_PQUOTA_ENFD);
333                         break;
334                 case Opt_pqnoenforce:
335                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
336                         mp->m_qflags &= ~XFS_PQUOTA_ENFD;
337                         break;
338                 case Opt_gquota:
339                 case Opt_grpquota:
340                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
341                                          XFS_GQUOTA_ENFD);
342                         break;
343                 case Opt_gqnoenforce:
344                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
345                         mp->m_qflags &= ~XFS_GQUOTA_ENFD;
346                         break;
347                 case Opt_discard:
348                         mp->m_flags |= XFS_MOUNT_DISCARD;
349                         break;
350                 case Opt_nodiscard:
351                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
352                         break;
353 #ifdef CONFIG_FS_DAX
354                 case Opt_dax:
355                         mp->m_flags |= XFS_MOUNT_DAX;
356                         break;
357 #endif
358                 default:
359                         xfs_warn(mp, "unknown mount option [%s].", p);
360                         return -EINVAL;
361                 }
362         }
363
364         /*
365          * no recovery flag requires a read-only mount
366          */
367         if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
368             !(mp->m_flags & XFS_MOUNT_RDONLY)) {
369                 xfs_warn(mp, "no-recovery mounts must be read-only.");
370                 return -EINVAL;
371         }
372
373         if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
374                 xfs_warn(mp,
375         "sunit and swidth options incompatible with the noalign option");
376                 return -EINVAL;
377         }
378
379 #ifndef CONFIG_XFS_QUOTA
380         if (XFS_IS_QUOTA_RUNNING(mp)) {
381                 xfs_warn(mp, "quota support not available in this kernel.");
382                 return -EINVAL;
383         }
384 #endif
385
386         if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
387                 xfs_warn(mp, "sunit and swidth must be specified together");
388                 return -EINVAL;
389         }
390
391         if (dsunit && (dswidth % dsunit != 0)) {
392                 xfs_warn(mp,
393         "stripe width (%d) must be a multiple of the stripe unit (%d)",
394                         dswidth, dsunit);
395                 return -EINVAL;
396         }
397
398 done:
399         if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
400                 /*
401                  * At this point the superblock has not been read
402                  * in, therefore we do not know the block size.
403                  * Before the mount call ends we will convert
404                  * these to FSBs.
405                  */
406                 mp->m_dalign = dsunit;
407                 mp->m_swidth = dswidth;
408         }
409
410         if (mp->m_logbufs != -1 &&
411             mp->m_logbufs != 0 &&
412             (mp->m_logbufs < XLOG_MIN_ICLOGS ||
413              mp->m_logbufs > XLOG_MAX_ICLOGS)) {
414                 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
415                         mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
416                 return -EINVAL;
417         }
418         if (mp->m_logbsize != -1 &&
419             mp->m_logbsize !=  0 &&
420             (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
421              mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
422              !is_power_of_2(mp->m_logbsize))) {
423                 xfs_warn(mp,
424                         "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
425                         mp->m_logbsize);
426                 return -EINVAL;
427         }
428
429         if (iosizelog) {
430                 if (iosizelog > XFS_MAX_IO_LOG ||
431                     iosizelog < XFS_MIN_IO_LOG) {
432                         xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
433                                 iosizelog, XFS_MIN_IO_LOG,
434                                 XFS_MAX_IO_LOG);
435                         return -EINVAL;
436                 }
437
438                 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
439                 mp->m_readio_log = iosizelog;
440                 mp->m_writeio_log = iosizelog;
441         }
442
443         return 0;
444 }
445
446 struct proc_xfs_info {
447         uint64_t        flag;
448         char            *str;
449 };
450
451 STATIC int
452 xfs_showargs(
453         struct xfs_mount        *mp,
454         struct seq_file         *m)
455 {
456         static struct proc_xfs_info xfs_info_set[] = {
457                 /* the few simple ones we can get from the mount struct */
458                 { XFS_MOUNT_IKEEP,              ",ikeep" },
459                 { XFS_MOUNT_WSYNC,              ",wsync" },
460                 { XFS_MOUNT_NOALIGN,            ",noalign" },
461                 { XFS_MOUNT_SWALLOC,            ",swalloc" },
462                 { XFS_MOUNT_NOUUID,             ",nouuid" },
463                 { XFS_MOUNT_NORECOVERY,         ",norecovery" },
464                 { XFS_MOUNT_ATTR2,              ",attr2" },
465                 { XFS_MOUNT_FILESTREAMS,        ",filestreams" },
466                 { XFS_MOUNT_GRPID,              ",grpid" },
467                 { XFS_MOUNT_DISCARD,            ",discard" },
468                 { XFS_MOUNT_SMALL_INUMS,        ",inode32" },
469                 { XFS_MOUNT_DAX,                ",dax" },
470                 { 0, NULL }
471         };
472         static struct proc_xfs_info xfs_info_unset[] = {
473                 /* the few simple ones we can get from the mount struct */
474                 { XFS_MOUNT_COMPAT_IOSIZE,      ",largeio" },
475                 { XFS_MOUNT_SMALL_INUMS,        ",inode64" },
476                 { 0, NULL }
477         };
478         struct proc_xfs_info    *xfs_infop;
479
480         for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
481                 if (mp->m_flags & xfs_infop->flag)
482                         seq_puts(m, xfs_infop->str);
483         }
484         for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
485                 if (!(mp->m_flags & xfs_infop->flag))
486                         seq_puts(m, xfs_infop->str);
487         }
488
489         if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
490                 seq_printf(m, ",allocsize=%dk",
491                                 (int)(1 << mp->m_writeio_log) >> 10);
492
493         if (mp->m_logbufs > 0)
494                 seq_printf(m, ",logbufs=%d", mp->m_logbufs);
495         if (mp->m_logbsize > 0)
496                 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
497
498         if (mp->m_logname)
499                 seq_show_option(m, "logdev", mp->m_logname);
500         if (mp->m_rtname)
501                 seq_show_option(m, "rtdev", mp->m_rtname);
502
503         if (mp->m_dalign > 0)
504                 seq_printf(m, ",sunit=%d",
505                                 (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
506         if (mp->m_swidth > 0)
507                 seq_printf(m, ",swidth=%d",
508                                 (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
509
510         if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
511                 seq_puts(m, ",usrquota");
512         else if (mp->m_qflags & XFS_UQUOTA_ACCT)
513                 seq_puts(m, ",uqnoenforce");
514
515         if (mp->m_qflags & XFS_PQUOTA_ACCT) {
516                 if (mp->m_qflags & XFS_PQUOTA_ENFD)
517                         seq_puts(m, ",prjquota");
518                 else
519                         seq_puts(m, ",pqnoenforce");
520         }
521         if (mp->m_qflags & XFS_GQUOTA_ACCT) {
522                 if (mp->m_qflags & XFS_GQUOTA_ENFD)
523                         seq_puts(m, ",grpquota");
524                 else
525                         seq_puts(m, ",gqnoenforce");
526         }
527
528         if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
529                 seq_puts(m, ",noquota");
530
531         return 0;
532 }
533 static uint64_t
534 xfs_max_file_offset(
535         unsigned int            blockshift)
536 {
537         unsigned int            pagefactor = 1;
538         unsigned int            bitshift = BITS_PER_LONG - 1;
539
540         /* Figure out maximum filesize, on Linux this can depend on
541          * the filesystem blocksize (on 32 bit platforms).
542          * __block_write_begin does this in an [unsigned] long...
543          *      page->index << (PAGE_SHIFT - bbits)
544          * So, for page sized blocks (4K on 32 bit platforms),
545          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
546          *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
547          * but for smaller blocksizes it is less (bbits = log2 bsize).
548          * Note1: get_block_t takes a long (implicit cast from above)
549          * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
550          * can optionally convert the [unsigned] long from above into
551          * an [unsigned] long long.
552          */
553
554 #if BITS_PER_LONG == 32
555 # if defined(CONFIG_LBDAF)
556         ASSERT(sizeof(sector_t) == 8);
557         pagefactor = PAGE_SIZE;
558         bitshift = BITS_PER_LONG;
559 # else
560         pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
561 # endif
562 #endif
563
564         return (((uint64_t)pagefactor) << bitshift) - 1;
565 }
566
567 /*
568  * Set parameters for inode allocation heuristics, taking into account
569  * filesystem size and inode32/inode64 mount options; i.e. specifically
570  * whether or not XFS_MOUNT_SMALL_INUMS is set.
571  *
572  * Inode allocation patterns are altered only if inode32 is requested
573  * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
574  * If altered, XFS_MOUNT_32BITINODES is set as well.
575  *
576  * An agcount independent of that in the mount structure is provided
577  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
578  * to the potentially higher ag count.
579  *
580  * Returns the maximum AG index which may contain inodes.
581  */
582 xfs_agnumber_t
583 xfs_set_inode_alloc(
584         struct xfs_mount *mp,
585         xfs_agnumber_t  agcount)
586 {
587         xfs_agnumber_t  index;
588         xfs_agnumber_t  maxagi = 0;
589         xfs_sb_t        *sbp = &mp->m_sb;
590         xfs_agnumber_t  max_metadata;
591         xfs_agino_t     agino;
592         xfs_ino_t       ino;
593
594         /*
595          * Calculate how much should be reserved for inodes to meet
596          * the max inode percentage.  Used only for inode32.
597          */
598         if (mp->m_maxicount) {
599                 uint64_t        icount;
600
601                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
602                 do_div(icount, 100);
603                 icount += sbp->sb_agblocks - 1;
604                 do_div(icount, sbp->sb_agblocks);
605                 max_metadata = icount;
606         } else {
607                 max_metadata = agcount;
608         }
609
610         /* Get the last possible inode in the filesystem */
611         agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
612         ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
613
614         /*
615          * If user asked for no more than 32-bit inodes, and the fs is
616          * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
617          * the allocator to accommodate the request.
618          */
619         if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
620                 mp->m_flags |= XFS_MOUNT_32BITINODES;
621         else
622                 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
623
624         for (index = 0; index < agcount; index++) {
625                 struct xfs_perag        *pag;
626
627                 ino = XFS_AGINO_TO_INO(mp, index, agino);
628
629                 pag = xfs_perag_get(mp, index);
630
631                 if (mp->m_flags & XFS_MOUNT_32BITINODES) {
632                         if (ino > XFS_MAXINUMBER_32) {
633                                 pag->pagi_inodeok = 0;
634                                 pag->pagf_metadata = 0;
635                         } else {
636                                 pag->pagi_inodeok = 1;
637                                 maxagi++;
638                                 if (index < max_metadata)
639                                         pag->pagf_metadata = 1;
640                                 else
641                                         pag->pagf_metadata = 0;
642                         }
643                 } else {
644                         pag->pagi_inodeok = 1;
645                         pag->pagf_metadata = 0;
646                 }
647
648                 xfs_perag_put(pag);
649         }
650
651         return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
652 }
653
654 STATIC int
655 xfs_blkdev_get(
656         xfs_mount_t             *mp,
657         const char              *name,
658         struct block_device     **bdevp)
659 {
660         int                     error = 0;
661
662         *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
663                                     mp);
664         if (IS_ERR(*bdevp)) {
665                 error = PTR_ERR(*bdevp);
666                 xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
667         }
668
669         return error;
670 }
671
672 STATIC void
673 xfs_blkdev_put(
674         struct block_device     *bdev)
675 {
676         if (bdev)
677                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
678 }
679
680 void
681 xfs_blkdev_issue_flush(
682         xfs_buftarg_t           *buftarg)
683 {
684         blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
685 }
686
687 STATIC void
688 xfs_close_devices(
689         struct xfs_mount        *mp)
690 {
691         struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
692
693         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
694                 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
695                 struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
696
697                 xfs_free_buftarg(mp->m_logdev_targp);
698                 xfs_blkdev_put(logdev);
699                 fs_put_dax(dax_logdev);
700         }
701         if (mp->m_rtdev_targp) {
702                 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
703                 struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
704
705                 xfs_free_buftarg(mp->m_rtdev_targp);
706                 xfs_blkdev_put(rtdev);
707                 fs_put_dax(dax_rtdev);
708         }
709         xfs_free_buftarg(mp->m_ddev_targp);
710         fs_put_dax(dax_ddev);
711 }
712
713 /*
714  * The file system configurations are:
715  *      (1) device (partition) with data and internal log
716  *      (2) logical volume with data and log subvolumes.
717  *      (3) logical volume with data, log, and realtime subvolumes.
718  *
719  * We only have to handle opening the log and realtime volumes here if
720  * they are present.  The data subvolume has already been opened by
721  * get_sb_bdev() and is stored in sb->s_bdev.
722  */
723 STATIC int
724 xfs_open_devices(
725         struct xfs_mount        *mp)
726 {
727         struct block_device     *ddev = mp->m_super->s_bdev;
728         struct dax_device       *dax_ddev = fs_dax_get_by_bdev(ddev);
729         struct dax_device       *dax_logdev = NULL, *dax_rtdev = NULL;
730         struct block_device     *logdev = NULL, *rtdev = NULL;
731         int                     error;
732
733         /*
734          * Open real time and log devices - order is important.
735          */
736         if (mp->m_logname) {
737                 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
738                 if (error)
739                         goto out;
740                 dax_logdev = fs_dax_get_by_bdev(logdev);
741         }
742
743         if (mp->m_rtname) {
744                 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
745                 if (error)
746                         goto out_close_logdev;
747
748                 if (rtdev == ddev || rtdev == logdev) {
749                         xfs_warn(mp,
750         "Cannot mount filesystem with identical rtdev and ddev/logdev.");
751                         error = -EINVAL;
752                         goto out_close_rtdev;
753                 }
754                 dax_rtdev = fs_dax_get_by_bdev(rtdev);
755         }
756
757         /*
758          * Setup xfs_mount buffer target pointers
759          */
760         error = -ENOMEM;
761         mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
762         if (!mp->m_ddev_targp)
763                 goto out_close_rtdev;
764
765         if (rtdev) {
766                 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
767                 if (!mp->m_rtdev_targp)
768                         goto out_free_ddev_targ;
769         }
770
771         if (logdev && logdev != ddev) {
772                 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
773                 if (!mp->m_logdev_targp)
774                         goto out_free_rtdev_targ;
775         } else {
776                 mp->m_logdev_targp = mp->m_ddev_targp;
777         }
778
779         return 0;
780
781  out_free_rtdev_targ:
782         if (mp->m_rtdev_targp)
783                 xfs_free_buftarg(mp->m_rtdev_targp);
784  out_free_ddev_targ:
785         xfs_free_buftarg(mp->m_ddev_targp);
786  out_close_rtdev:
787         xfs_blkdev_put(rtdev);
788         fs_put_dax(dax_rtdev);
789  out_close_logdev:
790         if (logdev && logdev != ddev) {
791                 xfs_blkdev_put(logdev);
792                 fs_put_dax(dax_logdev);
793         }
794  out:
795         fs_put_dax(dax_ddev);
796         return error;
797 }
798
799 /*
800  * Setup xfs_mount buffer target pointers based on superblock
801  */
802 STATIC int
803 xfs_setup_devices(
804         struct xfs_mount        *mp)
805 {
806         int                     error;
807
808         error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
809         if (error)
810                 return error;
811
812         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
813                 unsigned int    log_sector_size = BBSIZE;
814
815                 if (xfs_sb_version_hassector(&mp->m_sb))
816                         log_sector_size = mp->m_sb.sb_logsectsize;
817                 error = xfs_setsize_buftarg(mp->m_logdev_targp,
818                                             log_sector_size);
819                 if (error)
820                         return error;
821         }
822         if (mp->m_rtdev_targp) {
823                 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
824                                             mp->m_sb.sb_sectsize);
825                 if (error)
826                         return error;
827         }
828
829         return 0;
830 }
831
832 STATIC int
833 xfs_init_mount_workqueues(
834         struct xfs_mount        *mp)
835 {
836         mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
837                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
838         if (!mp->m_buf_workqueue)
839                 goto out;
840
841         mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
842                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
843         if (!mp->m_data_workqueue)
844                 goto out_destroy_buf;
845
846         mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
847                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
848         if (!mp->m_unwritten_workqueue)
849                 goto out_destroy_data_iodone_queue;
850
851         mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
852                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
853         if (!mp->m_cil_workqueue)
854                 goto out_destroy_unwritten;
855
856         mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
857                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
858         if (!mp->m_reclaim_workqueue)
859                 goto out_destroy_cil;
860
861         mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
862                         WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
863                         mp->m_fsname);
864         if (!mp->m_log_workqueue)
865                 goto out_destroy_reclaim;
866
867         mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
868                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
869         if (!mp->m_eofblocks_workqueue)
870                 goto out_destroy_log;
871
872         mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
873                                                mp->m_fsname);
874         if (!mp->m_sync_workqueue)
875                 goto out_destroy_eofb;
876
877         return 0;
878
879 out_destroy_eofb:
880         destroy_workqueue(mp->m_eofblocks_workqueue);
881 out_destroy_log:
882         destroy_workqueue(mp->m_log_workqueue);
883 out_destroy_reclaim:
884         destroy_workqueue(mp->m_reclaim_workqueue);
885 out_destroy_cil:
886         destroy_workqueue(mp->m_cil_workqueue);
887 out_destroy_unwritten:
888         destroy_workqueue(mp->m_unwritten_workqueue);
889 out_destroy_data_iodone_queue:
890         destroy_workqueue(mp->m_data_workqueue);
891 out_destroy_buf:
892         destroy_workqueue(mp->m_buf_workqueue);
893 out:
894         return -ENOMEM;
895 }
896
897 STATIC void
898 xfs_destroy_mount_workqueues(
899         struct xfs_mount        *mp)
900 {
901         destroy_workqueue(mp->m_sync_workqueue);
902         destroy_workqueue(mp->m_eofblocks_workqueue);
903         destroy_workqueue(mp->m_log_workqueue);
904         destroy_workqueue(mp->m_reclaim_workqueue);
905         destroy_workqueue(mp->m_cil_workqueue);
906         destroy_workqueue(mp->m_data_workqueue);
907         destroy_workqueue(mp->m_unwritten_workqueue);
908         destroy_workqueue(mp->m_buf_workqueue);
909 }
910
911 /*
912  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
913  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
914  * for IO to complete so that we effectively throttle multiple callers to the
915  * rate at which IO is completing.
916  */
917 void
918 xfs_flush_inodes(
919         struct xfs_mount        *mp)
920 {
921         struct super_block      *sb = mp->m_super;
922
923         if (down_read_trylock(&sb->s_umount)) {
924                 sync_inodes_sb(sb);
925                 up_read(&sb->s_umount);
926         }
927 }
928
929 /* Catch misguided souls that try to use this interface on XFS */
930 STATIC struct inode *
931 xfs_fs_alloc_inode(
932         struct super_block      *sb)
933 {
934         BUG();
935         return NULL;
936 }
937
938 #ifdef DEBUG
939 static void
940 xfs_check_delalloc(
941         struct xfs_inode        *ip,
942         int                     whichfork)
943 {
944         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
945         struct xfs_bmbt_irec    got;
946         struct xfs_iext_cursor  icur;
947
948         if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
949                 return;
950         do {
951                 if (isnullstartblock(got.br_startblock)) {
952                         xfs_warn(ip->i_mount,
953         "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
954                                 ip->i_ino,
955                                 whichfork == XFS_DATA_FORK ? "data" : "cow",
956                                 got.br_startoff, got.br_blockcount);
957                 }
958         } while (xfs_iext_next_extent(ifp, &icur, &got));
959 }
960 #else
961 #define xfs_check_delalloc(ip, whichfork)       do { } while (0)
962 #endif
963
964 /*
965  * Now that the generic code is guaranteed not to be accessing
966  * the linux inode, we can inactivate and reclaim the inode.
967  */
968 STATIC void
969 xfs_fs_destroy_inode(
970         struct inode            *inode)
971 {
972         struct xfs_inode        *ip = XFS_I(inode);
973
974         trace_xfs_destroy_inode(ip);
975
976         ASSERT(!rwsem_is_locked(&inode->i_rwsem));
977         XFS_STATS_INC(ip->i_mount, vn_rele);
978         XFS_STATS_INC(ip->i_mount, vn_remove);
979
980         xfs_inactive(ip);
981
982         if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
983                 xfs_check_delalloc(ip, XFS_DATA_FORK);
984                 xfs_check_delalloc(ip, XFS_COW_FORK);
985                 ASSERT(0);
986         }
987
988         XFS_STATS_INC(ip->i_mount, vn_reclaim);
989
990         /*
991          * We should never get here with one of the reclaim flags already set.
992          */
993         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
994         ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
995
996         /*
997          * We always use background reclaim here because even if the
998          * inode is clean, it still may be under IO and hence we have
999          * to take the flush lock. The background reclaim path handles
1000          * this more efficiently than we can here, so simply let background
1001          * reclaim tear down all inodes.
1002          */
1003         xfs_inode_set_reclaim_tag(ip);
1004 }
1005
1006 static void
1007 xfs_fs_dirty_inode(
1008         struct inode                    *inode,
1009         int                             flag)
1010 {
1011         struct xfs_inode                *ip = XFS_I(inode);
1012         struct xfs_mount                *mp = ip->i_mount;
1013         struct xfs_trans                *tp;
1014
1015         if (!(inode->i_sb->s_flags & SB_LAZYTIME))
1016                 return;
1017         if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
1018                 return;
1019
1020         if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
1021                 return;
1022         xfs_ilock(ip, XFS_ILOCK_EXCL);
1023         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1024         xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
1025         xfs_trans_commit(tp);
1026 }
1027
1028 /*
1029  * Slab object creation initialisation for the XFS inode.
1030  * This covers only the idempotent fields in the XFS inode;
1031  * all other fields need to be initialised on allocation
1032  * from the slab. This avoids the need to repeatedly initialise
1033  * fields in the xfs inode that left in the initialise state
1034  * when freeing the inode.
1035  */
1036 STATIC void
1037 xfs_fs_inode_init_once(
1038         void                    *inode)
1039 {
1040         struct xfs_inode        *ip = inode;
1041
1042         memset(ip, 0, sizeof(struct xfs_inode));
1043
1044         /* vfs inode */
1045         inode_init_once(VFS_I(ip));
1046
1047         /* xfs inode */
1048         atomic_set(&ip->i_pincount, 0);
1049         spin_lock_init(&ip->i_flags_lock);
1050
1051         mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1052                      "xfsino", ip->i_ino);
1053         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1054                      "xfsino", ip->i_ino);
1055 }
1056
1057 /*
1058  * We do an unlocked check for XFS_IDONTCACHE here because we are already
1059  * serialised against cache hits here via the inode->i_lock and igrab() in
1060  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
1061  * racing with us, and it avoids needing to grab a spinlock here for every inode
1062  * we drop the final reference on.
1063  */
1064 STATIC int
1065 xfs_fs_drop_inode(
1066         struct inode            *inode)
1067 {
1068         struct xfs_inode        *ip = XFS_I(inode);
1069
1070         /*
1071          * If this unlinked inode is in the middle of recovery, don't
1072          * drop the inode just yet; log recovery will take care of
1073          * that.  See the comment for this inode flag.
1074          */
1075         if (ip->i_flags & XFS_IRECOVERY) {
1076                 ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
1077                 return 0;
1078         }
1079
1080         return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
1081 }
1082
1083 STATIC void
1084 xfs_free_fsname(
1085         struct xfs_mount        *mp)
1086 {
1087         kfree(mp->m_fsname);
1088         kfree(mp->m_rtname);
1089         kfree(mp->m_logname);
1090 }
1091
1092 STATIC int
1093 xfs_fs_sync_fs(
1094         struct super_block      *sb,
1095         int                     wait)
1096 {
1097         struct xfs_mount        *mp = XFS_M(sb);
1098
1099         /*
1100          * Doing anything during the async pass would be counterproductive.
1101          */
1102         if (!wait)
1103                 return 0;
1104
1105         xfs_log_force(mp, XFS_LOG_SYNC);
1106         if (laptop_mode) {
1107                 /*
1108                  * The disk must be active because we're syncing.
1109                  * We schedule log work now (now that the disk is
1110                  * active) instead of later (when it might not be).
1111                  */
1112                 flush_delayed_work(&mp->m_log->l_work);
1113         }
1114
1115         return 0;
1116 }
1117
1118 STATIC int
1119 xfs_fs_statfs(
1120         struct dentry           *dentry,
1121         struct kstatfs          *statp)
1122 {
1123         struct xfs_mount        *mp = XFS_M(dentry->d_sb);
1124         xfs_sb_t                *sbp = &mp->m_sb;
1125         struct xfs_inode        *ip = XFS_I(d_inode(dentry));
1126         uint64_t                fakeinos, id;
1127         uint64_t                icount;
1128         uint64_t                ifree;
1129         uint64_t                fdblocks;
1130         xfs_extlen_t            lsize;
1131         int64_t                 ffree;
1132
1133         statp->f_type = XFS_SUPER_MAGIC;
1134         statp->f_namelen = MAXNAMELEN - 1;
1135
1136         id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
1137         statp->f_fsid.val[0] = (u32)id;
1138         statp->f_fsid.val[1] = (u32)(id >> 32);
1139
1140         icount = percpu_counter_sum(&mp->m_icount);
1141         ifree = percpu_counter_sum(&mp->m_ifree);
1142         fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1143
1144         spin_lock(&mp->m_sb_lock);
1145         statp->f_bsize = sbp->sb_blocksize;
1146         lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1147         statp->f_blocks = sbp->sb_dblocks - lsize;
1148         spin_unlock(&mp->m_sb_lock);
1149
1150         statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1151         statp->f_bavail = statp->f_bfree;
1152
1153         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
1154         statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
1155         if (mp->m_maxicount)
1156                 statp->f_files = min_t(typeof(statp->f_files),
1157                                         statp->f_files,
1158                                         mp->m_maxicount);
1159
1160         /* If sb_icount overshot maxicount, report actual allocation */
1161         statp->f_files = max_t(typeof(statp->f_files),
1162                                         statp->f_files,
1163                                         sbp->sb_icount);
1164
1165         /* make sure statp->f_ffree does not underflow */
1166         ffree = statp->f_files - (icount - ifree);
1167         statp->f_ffree = max_t(int64_t, ffree, 0);
1168
1169
1170         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1171             ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
1172                               (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
1173                 xfs_qm_statvfs(ip, statp);
1174
1175         if (XFS_IS_REALTIME_MOUNT(mp) &&
1176             (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
1177                 statp->f_blocks = sbp->sb_rblocks;
1178                 statp->f_bavail = statp->f_bfree =
1179                         sbp->sb_frextents * sbp->sb_rextsize;
1180         }
1181
1182         return 0;
1183 }
1184
1185 STATIC void
1186 xfs_save_resvblks(struct xfs_mount *mp)
1187 {
1188         uint64_t resblks = 0;
1189
1190         mp->m_resblks_save = mp->m_resblks;
1191         xfs_reserve_blocks(mp, &resblks, NULL);
1192 }
1193
1194 STATIC void
1195 xfs_restore_resvblks(struct xfs_mount *mp)
1196 {
1197         uint64_t resblks;
1198
1199         if (mp->m_resblks_save) {
1200                 resblks = mp->m_resblks_save;
1201                 mp->m_resblks_save = 0;
1202         } else
1203                 resblks = xfs_default_resblks(mp);
1204
1205         xfs_reserve_blocks(mp, &resblks, NULL);
1206 }
1207
1208 /*
1209  * Trigger writeback of all the dirty metadata in the file system.
1210  *
1211  * This ensures that the metadata is written to their location on disk rather
1212  * than just existing in transactions in the log. This means after a quiesce
1213  * there is no log replay required to write the inodes to disk - this is the
1214  * primary difference between a sync and a quiesce.
1215  *
1216  * Note: xfs_log_quiesce() stops background log work - the callers must ensure
1217  * it is started again when appropriate.
1218  */
1219 void
1220 xfs_quiesce_attr(
1221         struct xfs_mount        *mp)
1222 {
1223         int     error = 0;
1224
1225         /* wait for all modifications to complete */
1226         while (atomic_read(&mp->m_active_trans) > 0)
1227                 delay(100);
1228
1229         /* force the log to unpin objects from the now complete transactions */
1230         xfs_log_force(mp, XFS_LOG_SYNC);
1231
1232         /* reclaim inodes to do any IO before the freeze completes */
1233         xfs_reclaim_inodes(mp, 0);
1234         xfs_reclaim_inodes(mp, SYNC_WAIT);
1235
1236         /* Push the superblock and write an unmount record */
1237         error = xfs_log_sbcount(mp);
1238         if (error)
1239                 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
1240                                 "Frozen image may not be consistent.");
1241         /*
1242          * Just warn here till VFS can correctly support
1243          * read-only remount without racing.
1244          */
1245         WARN_ON(atomic_read(&mp->m_active_trans) != 0);
1246
1247         xfs_log_quiesce(mp);
1248 }
1249
1250 STATIC int
1251 xfs_test_remount_options(
1252         struct super_block      *sb,
1253         char                    *options)
1254 {
1255         int                     error = 0;
1256         struct xfs_mount        *tmp_mp;
1257
1258         tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
1259         if (!tmp_mp)
1260                 return -ENOMEM;
1261
1262         tmp_mp->m_super = sb;
1263         error = xfs_parseargs(tmp_mp, options);
1264         xfs_free_fsname(tmp_mp);
1265         kmem_free(tmp_mp);
1266
1267         return error;
1268 }
1269
1270 STATIC int
1271 xfs_fs_remount(
1272         struct super_block      *sb,
1273         int                     *flags,
1274         char                    *options)
1275 {
1276         struct xfs_mount        *mp = XFS_M(sb);
1277         xfs_sb_t                *sbp = &mp->m_sb;
1278         substring_t             args[MAX_OPT_ARGS];
1279         char                    *p;
1280         int                     error;
1281
1282         /* First, check for complete junk; i.e. invalid options */
1283         error = xfs_test_remount_options(sb, options);
1284         if (error)
1285                 return error;
1286
1287         sync_filesystem(sb);
1288         while ((p = strsep(&options, ",")) != NULL) {
1289                 int token;
1290
1291                 if (!*p)
1292                         continue;
1293
1294                 token = match_token(p, tokens, args);
1295                 switch (token) {
1296                 case Opt_inode64:
1297                         mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1298                         mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1299                         break;
1300                 case Opt_inode32:
1301                         mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1302                         mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1303                         break;
1304                 default:
1305                         /*
1306                          * Logically we would return an error here to prevent
1307                          * users from believing they might have changed
1308                          * mount options using remount which can't be changed.
1309                          *
1310                          * But unfortunately mount(8) adds all options from
1311                          * mtab and fstab to the mount arguments in some cases
1312                          * so we can't blindly reject options, but have to
1313                          * check for each specified option if it actually
1314                          * differs from the currently set option and only
1315                          * reject it if that's the case.
1316                          *
1317                          * Until that is implemented we return success for
1318                          * every remount request, and silently ignore all
1319                          * options that we can't actually change.
1320                          */
1321 #if 0
1322                         xfs_info(mp,
1323                 "mount option \"%s\" not supported for remount", p);
1324                         return -EINVAL;
1325 #else
1326                         break;
1327 #endif
1328                 }
1329         }
1330
1331         /* ro -> rw */
1332         if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
1333                 if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1334                         xfs_warn(mp,
1335                 "ro->rw transition prohibited on norecovery mount");
1336                         return -EINVAL;
1337                 }
1338
1339                 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1340                     xfs_sb_has_ro_compat_feature(sbp,
1341                                         XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1342                         xfs_warn(mp,
1343 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1344                                 (sbp->sb_features_ro_compat &
1345                                         XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1346                         return -EINVAL;
1347                 }
1348
1349                 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1350
1351                 /*
1352                  * If this is the first remount to writeable state we
1353                  * might have some superblock changes to update.
1354                  */
1355                 if (mp->m_update_sb) {
1356                         error = xfs_sync_sb(mp, false);
1357                         if (error) {
1358                                 xfs_warn(mp, "failed to write sb changes");
1359                                 return error;
1360                         }
1361                         mp->m_update_sb = false;
1362                 }
1363
1364                 /*
1365                  * Fill out the reserve pool if it is empty. Use the stashed
1366                  * value if it is non-zero, otherwise go with the default.
1367                  */
1368                 xfs_restore_resvblks(mp);
1369                 xfs_log_work_queue(mp);
1370
1371                 /* Recover any CoW blocks that never got remapped. */
1372                 error = xfs_reflink_recover_cow(mp);
1373                 if (error) {
1374                         xfs_err(mp,
1375         "Error %d recovering leftover CoW allocations.", error);
1376                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1377                         return error;
1378                 }
1379                 xfs_icache_enable_reclaim(mp);
1380
1381                 /* Create the per-AG metadata reservation pool .*/
1382                 error = xfs_fs_reserve_ag_blocks(mp);
1383                 if (error && error != -ENOSPC)
1384                         return error;
1385         }
1386
1387         /* rw -> ro */
1388         if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1389                 /*
1390                  * Cancel background eofb scanning so it cannot race with the
1391                  * final log force+buftarg wait and deadlock the remount.
1392                  */
1393                 xfs_icache_disable_reclaim(mp);
1394
1395                 /* Get rid of any leftover CoW reservations... */
1396                 error = xfs_icache_free_cowblocks(mp, NULL);
1397                 if (error) {
1398                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1399                         return error;
1400                 }
1401
1402                 /* Free the per-AG metadata reservation pool. */
1403                 error = xfs_fs_unreserve_ag_blocks(mp);
1404                 if (error) {
1405                         xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1406                         return error;
1407                 }
1408
1409                 /*
1410                  * Before we sync the metadata, we need to free up the reserve
1411                  * block pool so that the used block count in the superblock on
1412                  * disk is correct at the end of the remount. Stash the current
1413                  * reserve pool size so that if we get remounted rw, we can
1414                  * return it to the same size.
1415                  */
1416                 xfs_save_resvblks(mp);
1417
1418                 xfs_quiesce_attr(mp);
1419                 mp->m_flags |= XFS_MOUNT_RDONLY;
1420         }
1421
1422         return 0;
1423 }
1424
1425 /*
1426  * Second stage of a freeze. The data is already frozen so we only
1427  * need to take care of the metadata. Once that's done sync the superblock
1428  * to the log to dirty it in case of a crash while frozen. This ensures that we
1429  * will recover the unlinked inode lists on the next mount.
1430  */
1431 STATIC int
1432 xfs_fs_freeze(
1433         struct super_block      *sb)
1434 {
1435         struct xfs_mount        *mp = XFS_M(sb);
1436
1437         xfs_icache_disable_reclaim(mp);
1438         xfs_save_resvblks(mp);
1439         xfs_quiesce_attr(mp);
1440         return xfs_sync_sb(mp, true);
1441 }
1442
1443 STATIC int
1444 xfs_fs_unfreeze(
1445         struct super_block      *sb)
1446 {
1447         struct xfs_mount        *mp = XFS_M(sb);
1448
1449         xfs_restore_resvblks(mp);
1450         xfs_log_work_queue(mp);
1451         xfs_icache_enable_reclaim(mp);
1452         return 0;
1453 }
1454
1455 STATIC int
1456 xfs_fs_show_options(
1457         struct seq_file         *m,
1458         struct dentry           *root)
1459 {
1460         return xfs_showargs(XFS_M(root->d_sb), m);
1461 }
1462
1463 /*
1464  * This function fills in xfs_mount_t fields based on mount args.
1465  * Note: the superblock _has_ now been read in.
1466  */
1467 STATIC int
1468 xfs_finish_flags(
1469         struct xfs_mount        *mp)
1470 {
1471         int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1472
1473         /* Fail a mount where the logbuf is smaller than the log stripe */
1474         if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1475                 if (mp->m_logbsize <= 0 &&
1476                     mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1477                         mp->m_logbsize = mp->m_sb.sb_logsunit;
1478                 } else if (mp->m_logbsize > 0 &&
1479                            mp->m_logbsize < mp->m_sb.sb_logsunit) {
1480                         xfs_warn(mp,
1481                 "logbuf size must be greater than or equal to log stripe size");
1482                         return -EINVAL;
1483                 }
1484         } else {
1485                 /* Fail a mount if the logbuf is larger than 32K */
1486                 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1487                         xfs_warn(mp,
1488                 "logbuf size for version 1 logs must be 16K or 32K");
1489                         return -EINVAL;
1490                 }
1491         }
1492
1493         /*
1494          * V5 filesystems always use attr2 format for attributes.
1495          */
1496         if (xfs_sb_version_hascrc(&mp->m_sb) &&
1497             (mp->m_flags & XFS_MOUNT_NOATTR2)) {
1498                 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1499                              "attr2 is always enabled for V5 filesystems.");
1500                 return -EINVAL;
1501         }
1502
1503         /*
1504          * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1505          * told by noattr2 to turn it off
1506          */
1507         if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1508             !(mp->m_flags & XFS_MOUNT_NOATTR2))
1509                 mp->m_flags |= XFS_MOUNT_ATTR2;
1510
1511         /*
1512          * prohibit r/w mounts of read-only filesystems
1513          */
1514         if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1515                 xfs_warn(mp,
1516                         "cannot mount a read-only filesystem as read-write");
1517                 return -EROFS;
1518         }
1519
1520         if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
1521             (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
1522             !xfs_sb_version_has_pquotino(&mp->m_sb)) {
1523                 xfs_warn(mp,
1524                   "Super block does not support project and group quota together");
1525                 return -EINVAL;
1526         }
1527
1528         return 0;
1529 }
1530
1531 static int
1532 xfs_init_percpu_counters(
1533         struct xfs_mount        *mp)
1534 {
1535         int             error;
1536
1537         error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1538         if (error)
1539                 return -ENOMEM;
1540
1541         error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1542         if (error)
1543                 goto free_icount;
1544
1545         error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1546         if (error)
1547                 goto free_ifree;
1548
1549         return 0;
1550
1551 free_ifree:
1552         percpu_counter_destroy(&mp->m_ifree);
1553 free_icount:
1554         percpu_counter_destroy(&mp->m_icount);
1555         return -ENOMEM;
1556 }
1557
1558 void
1559 xfs_reinit_percpu_counters(
1560         struct xfs_mount        *mp)
1561 {
1562         percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1563         percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1564         percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1565 }
1566
1567 static void
1568 xfs_destroy_percpu_counters(
1569         struct xfs_mount        *mp)
1570 {
1571         percpu_counter_destroy(&mp->m_icount);
1572         percpu_counter_destroy(&mp->m_ifree);
1573         percpu_counter_destroy(&mp->m_fdblocks);
1574 }
1575
1576 static struct xfs_mount *
1577 xfs_mount_alloc(
1578         struct super_block      *sb)
1579 {
1580         struct xfs_mount        *mp;
1581
1582         mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1583         if (!mp)
1584                 return NULL;
1585
1586         mp->m_super = sb;
1587         spin_lock_init(&mp->m_sb_lock);
1588         spin_lock_init(&mp->m_agirotor_lock);
1589         INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1590         spin_lock_init(&mp->m_perag_lock);
1591         mutex_init(&mp->m_growlock);
1592         atomic_set(&mp->m_active_trans, 0);
1593         INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1594         INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1595         INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1596         mp->m_kobj.kobject.kset = xfs_kset;
1597         /*
1598          * We don't create the finobt per-ag space reservation until after log
1599          * recovery, so we must set this to true so that an ifree transaction
1600          * started during log recovery will not depend on space reservations
1601          * for finobt expansion.
1602          */
1603         mp->m_finobt_nores = true;
1604         return mp;
1605 }
1606
1607
1608 STATIC int
1609 xfs_fs_fill_super(
1610         struct super_block      *sb,
1611         void                    *data,
1612         int                     silent)
1613 {
1614         struct inode            *root;
1615         struct xfs_mount        *mp = NULL;
1616         int                     flags = 0, error = -ENOMEM;
1617
1618         /*
1619          * allocate mp and do all low-level struct initializations before we
1620          * attach it to the super
1621          */
1622         mp = xfs_mount_alloc(sb);
1623         if (!mp)
1624                 goto out;
1625         sb->s_fs_info = mp;
1626
1627         error = xfs_parseargs(mp, (char *)data);
1628         if (error)
1629                 goto out_free_fsname;
1630
1631         sb_min_blocksize(sb, BBSIZE);
1632         sb->s_xattr = xfs_xattr_handlers;
1633         sb->s_export_op = &xfs_export_operations;
1634 #ifdef CONFIG_XFS_QUOTA
1635         sb->s_qcop = &xfs_quotactl_operations;
1636         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1637 #endif
1638         sb->s_op = &xfs_super_operations;
1639
1640         /*
1641          * Delay mount work if the debug hook is set. This is debug
1642          * instrumention to coordinate simulation of xfs mount failures with
1643          * VFS superblock operations
1644          */
1645         if (xfs_globals.mount_delay) {
1646                 xfs_notice(mp, "Delaying mount for %d seconds.",
1647                         xfs_globals.mount_delay);
1648                 msleep(xfs_globals.mount_delay * 1000);
1649         }
1650
1651         if (silent)
1652                 flags |= XFS_MFSI_QUIET;
1653
1654         error = xfs_open_devices(mp);
1655         if (error)
1656                 goto out_free_fsname;
1657
1658         error = xfs_init_mount_workqueues(mp);
1659         if (error)
1660                 goto out_close_devices;
1661
1662         error = xfs_init_percpu_counters(mp);
1663         if (error)
1664                 goto out_destroy_workqueues;
1665
1666         /* Allocate stats memory before we do operations that might use it */
1667         mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1668         if (!mp->m_stats.xs_stats) {
1669                 error = -ENOMEM;
1670                 goto out_destroy_counters;
1671         }
1672
1673         error = xfs_readsb(mp, flags);
1674         if (error)
1675                 goto out_free_stats;
1676
1677         error = xfs_finish_flags(mp);
1678         if (error)
1679                 goto out_free_sb;
1680
1681         error = xfs_setup_devices(mp);
1682         if (error)
1683                 goto out_free_sb;
1684
1685         error = xfs_filestream_mount(mp);
1686         if (error)
1687                 goto out_free_sb;
1688
1689         /*
1690          * we must configure the block size in the superblock before we run the
1691          * full mount process as the mount process can lookup and cache inodes.
1692          */
1693         sb->s_magic = XFS_SUPER_MAGIC;
1694         sb->s_blocksize = mp->m_sb.sb_blocksize;
1695         sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1696         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1697         sb->s_max_links = XFS_MAXLINK;
1698         sb->s_time_gran = 1;
1699         set_posix_acl_flag(sb);
1700
1701         /* version 5 superblocks support inode version counters. */
1702         if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1703                 sb->s_flags |= SB_I_VERSION;
1704
1705         if (mp->m_flags & XFS_MOUNT_DAX) {
1706                 bool rtdev_is_dax = false, datadev_is_dax;
1707
1708                 xfs_warn(mp,
1709                 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1710
1711                 datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1712                         sb->s_blocksize);
1713                 if (mp->m_rtdev_targp)
1714                         rtdev_is_dax = bdev_dax_supported(
1715                                 mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1716                 if (!rtdev_is_dax && !datadev_is_dax) {
1717                         xfs_alert(mp,
1718                         "DAX unsupported by block device. Turning off DAX.");
1719                         mp->m_flags &= ~XFS_MOUNT_DAX;
1720                 }
1721                 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1722                         xfs_alert(mp,
1723                 "DAX and reflink cannot be used together!");
1724                         error = -EINVAL;
1725                         goto out_filestream_unmount;
1726                 }
1727         }
1728
1729         if (mp->m_flags & XFS_MOUNT_DISCARD) {
1730                 struct request_queue *q = bdev_get_queue(sb->s_bdev);
1731
1732                 if (!blk_queue_discard(q)) {
1733                         xfs_warn(mp, "mounting with \"discard\" option, but "
1734                                         "the device does not support discard");
1735                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
1736                 }
1737         }
1738
1739         if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1740                 if (mp->m_sb.sb_rblocks) {
1741                         xfs_alert(mp,
1742         "reflink not compatible with realtime device!");
1743                         error = -EINVAL;
1744                         goto out_filestream_unmount;
1745                 }
1746
1747                 if (xfs_globals.always_cow) {
1748                         xfs_info(mp, "using DEBUG-only always_cow mode.");
1749                         mp->m_always_cow = true;
1750                 }
1751         }
1752
1753         if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1754                 xfs_alert(mp,
1755         "reverse mapping btree not compatible with realtime device!");
1756                 error = -EINVAL;
1757                 goto out_filestream_unmount;
1758         }
1759
1760         error = xfs_mountfs(mp);
1761         if (error)
1762                 goto out_filestream_unmount;
1763
1764         root = igrab(VFS_I(mp->m_rootip));
1765         if (!root) {
1766                 error = -ENOENT;
1767                 goto out_unmount;
1768         }
1769         sb->s_root = d_make_root(root);
1770         if (!sb->s_root) {
1771                 error = -ENOMEM;
1772                 goto out_unmount;
1773         }
1774
1775         return 0;
1776
1777  out_filestream_unmount:
1778         xfs_filestream_unmount(mp);
1779  out_free_sb:
1780         xfs_freesb(mp);
1781  out_free_stats:
1782         free_percpu(mp->m_stats.xs_stats);
1783  out_destroy_counters:
1784         xfs_destroy_percpu_counters(mp);
1785  out_destroy_workqueues:
1786         xfs_destroy_mount_workqueues(mp);
1787  out_close_devices:
1788         xfs_close_devices(mp);
1789  out_free_fsname:
1790         sb->s_fs_info = NULL;
1791         xfs_free_fsname(mp);
1792         kfree(mp);
1793  out:
1794         return error;
1795
1796  out_unmount:
1797         xfs_filestream_unmount(mp);
1798         xfs_unmountfs(mp);
1799         goto out_free_sb;
1800 }
1801
1802 STATIC void
1803 xfs_fs_put_super(
1804         struct super_block      *sb)
1805 {
1806         struct xfs_mount        *mp = XFS_M(sb);
1807
1808         /* if ->fill_super failed, we have no mount to tear down */
1809         if (!sb->s_fs_info)
1810                 return;
1811
1812         xfs_notice(mp, "Unmounting Filesystem");
1813         xfs_filestream_unmount(mp);
1814         xfs_unmountfs(mp);
1815
1816         xfs_freesb(mp);
1817         free_percpu(mp->m_stats.xs_stats);
1818         xfs_destroy_percpu_counters(mp);
1819         xfs_destroy_mount_workqueues(mp);
1820         xfs_close_devices(mp);
1821
1822         sb->s_fs_info = NULL;
1823         xfs_free_fsname(mp);
1824         kfree(mp);
1825 }
1826
1827 STATIC struct dentry *
1828 xfs_fs_mount(
1829         struct file_system_type *fs_type,
1830         int                     flags,
1831         const char              *dev_name,
1832         void                    *data)
1833 {
1834         return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1835 }
1836
1837 static long
1838 xfs_fs_nr_cached_objects(
1839         struct super_block      *sb,
1840         struct shrink_control   *sc)
1841 {
1842         /* Paranoia: catch incorrect calls during mount setup or teardown */
1843         if (WARN_ON_ONCE(!sb->s_fs_info))
1844                 return 0;
1845         return xfs_reclaim_inodes_count(XFS_M(sb));
1846 }
1847
1848 static long
1849 xfs_fs_free_cached_objects(
1850         struct super_block      *sb,
1851         struct shrink_control   *sc)
1852 {
1853         return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1854 }
1855
1856 static const struct super_operations xfs_super_operations = {
1857         .alloc_inode            = xfs_fs_alloc_inode,
1858         .destroy_inode          = xfs_fs_destroy_inode,
1859         .dirty_inode            = xfs_fs_dirty_inode,
1860         .drop_inode             = xfs_fs_drop_inode,
1861         .put_super              = xfs_fs_put_super,
1862         .sync_fs                = xfs_fs_sync_fs,
1863         .freeze_fs              = xfs_fs_freeze,
1864         .unfreeze_fs            = xfs_fs_unfreeze,
1865         .statfs                 = xfs_fs_statfs,
1866         .remount_fs             = xfs_fs_remount,
1867         .show_options           = xfs_fs_show_options,
1868         .nr_cached_objects      = xfs_fs_nr_cached_objects,
1869         .free_cached_objects    = xfs_fs_free_cached_objects,
1870 };
1871
1872 static struct file_system_type xfs_fs_type = {
1873         .owner                  = THIS_MODULE,
1874         .name                   = "xfs",
1875         .mount                  = xfs_fs_mount,
1876         .kill_sb                = kill_block_super,
1877         .fs_flags               = FS_REQUIRES_DEV,
1878 };
1879 MODULE_ALIAS_FS("xfs");
1880
1881 STATIC int __init
1882 xfs_init_zones(void)
1883 {
1884         if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
1885                         offsetof(struct xfs_ioend, io_inline_bio),
1886                         BIOSET_NEED_BVECS))
1887                 goto out;
1888
1889         xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1890                                                 "xfs_log_ticket");
1891         if (!xfs_log_ticket_zone)
1892                 goto out_free_ioend_bioset;
1893
1894         xfs_bmap_free_item_zone = kmem_zone_init(
1895                         sizeof(struct xfs_extent_free_item),
1896                         "xfs_bmap_free_item");
1897         if (!xfs_bmap_free_item_zone)
1898                 goto out_destroy_log_ticket_zone;
1899
1900         xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1901                                                 "xfs_btree_cur");
1902         if (!xfs_btree_cur_zone)
1903                 goto out_destroy_bmap_free_item_zone;
1904
1905         xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1906                                                 "xfs_da_state");
1907         if (!xfs_da_state_zone)
1908                 goto out_destroy_btree_cur_zone;
1909
1910         xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
1911         if (!xfs_ifork_zone)
1912                 goto out_destroy_da_state_zone;
1913
1914         xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1915         if (!xfs_trans_zone)
1916                 goto out_destroy_ifork_zone;
1917
1918
1919         /*
1920          * The size of the zone allocated buf log item is the maximum
1921          * size possible under XFS.  This wastes a little bit of memory,
1922          * but it is much faster.
1923          */
1924         xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
1925                                            "xfs_buf_item");
1926         if (!xfs_buf_item_zone)
1927                 goto out_destroy_trans_zone;
1928
1929         xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1930                         ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1931                                  sizeof(xfs_extent_t))), "xfs_efd_item");
1932         if (!xfs_efd_zone)
1933                 goto out_destroy_buf_item_zone;
1934
1935         xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1936                         ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1937                                 sizeof(xfs_extent_t))), "xfs_efi_item");
1938         if (!xfs_efi_zone)
1939                 goto out_destroy_efd_zone;
1940
1941         xfs_inode_zone =
1942                 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1943                         KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
1944                         KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
1945         if (!xfs_inode_zone)
1946                 goto out_destroy_efi_zone;
1947
1948         xfs_ili_zone =
1949                 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
1950                                         KM_ZONE_SPREAD, NULL);
1951         if (!xfs_ili_zone)
1952                 goto out_destroy_inode_zone;
1953         xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
1954                                         "xfs_icr");
1955         if (!xfs_icreate_zone)
1956                 goto out_destroy_ili_zone;
1957
1958         xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1959                         "xfs_rud_item");
1960         if (!xfs_rud_zone)
1961                 goto out_destroy_icreate_zone;
1962
1963         xfs_rui_zone = kmem_zone_init(
1964                         xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1965                         "xfs_rui_item");
1966         if (!xfs_rui_zone)
1967                 goto out_destroy_rud_zone;
1968
1969         xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
1970                         "xfs_cud_item");
1971         if (!xfs_cud_zone)
1972                 goto out_destroy_rui_zone;
1973
1974         xfs_cui_zone = kmem_zone_init(
1975                         xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1976                         "xfs_cui_item");
1977         if (!xfs_cui_zone)
1978                 goto out_destroy_cud_zone;
1979
1980         xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
1981                         "xfs_bud_item");
1982         if (!xfs_bud_zone)
1983                 goto out_destroy_cui_zone;
1984
1985         xfs_bui_zone = kmem_zone_init(
1986                         xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1987                         "xfs_bui_item");
1988         if (!xfs_bui_zone)
1989                 goto out_destroy_bud_zone;
1990
1991         return 0;
1992
1993  out_destroy_bud_zone:
1994         kmem_zone_destroy(xfs_bud_zone);
1995  out_destroy_cui_zone:
1996         kmem_zone_destroy(xfs_cui_zone);
1997  out_destroy_cud_zone:
1998         kmem_zone_destroy(xfs_cud_zone);
1999  out_destroy_rui_zone:
2000         kmem_zone_destroy(xfs_rui_zone);
2001  out_destroy_rud_zone:
2002         kmem_zone_destroy(xfs_rud_zone);
2003  out_destroy_icreate_zone:
2004         kmem_zone_destroy(xfs_icreate_zone);
2005  out_destroy_ili_zone:
2006         kmem_zone_destroy(xfs_ili_zone);
2007  out_destroy_inode_zone:
2008         kmem_zone_destroy(xfs_inode_zone);
2009  out_destroy_efi_zone:
2010         kmem_zone_destroy(xfs_efi_zone);
2011  out_destroy_efd_zone:
2012         kmem_zone_destroy(xfs_efd_zone);
2013  out_destroy_buf_item_zone:
2014         kmem_zone_destroy(xfs_buf_item_zone);
2015  out_destroy_trans_zone:
2016         kmem_zone_destroy(xfs_trans_zone);
2017  out_destroy_ifork_zone:
2018         kmem_zone_destroy(xfs_ifork_zone);
2019  out_destroy_da_state_zone:
2020         kmem_zone_destroy(xfs_da_state_zone);
2021  out_destroy_btree_cur_zone:
2022         kmem_zone_destroy(xfs_btree_cur_zone);
2023  out_destroy_bmap_free_item_zone:
2024         kmem_zone_destroy(xfs_bmap_free_item_zone);
2025  out_destroy_log_ticket_zone:
2026         kmem_zone_destroy(xfs_log_ticket_zone);
2027  out_free_ioend_bioset:
2028         bioset_exit(&xfs_ioend_bioset);
2029  out:
2030         return -ENOMEM;
2031 }
2032
2033 STATIC void
2034 xfs_destroy_zones(void)
2035 {
2036         /*
2037          * Make sure all delayed rcu free are flushed before we
2038          * destroy caches.
2039          */
2040         rcu_barrier();
2041         kmem_zone_destroy(xfs_bui_zone);
2042         kmem_zone_destroy(xfs_bud_zone);
2043         kmem_zone_destroy(xfs_cui_zone);
2044         kmem_zone_destroy(xfs_cud_zone);
2045         kmem_zone_destroy(xfs_rui_zone);
2046         kmem_zone_destroy(xfs_rud_zone);
2047         kmem_zone_destroy(xfs_icreate_zone);
2048         kmem_zone_destroy(xfs_ili_zone);
2049         kmem_zone_destroy(xfs_inode_zone);
2050         kmem_zone_destroy(xfs_efi_zone);
2051         kmem_zone_destroy(xfs_efd_zone);
2052         kmem_zone_destroy(xfs_buf_item_zone);
2053         kmem_zone_destroy(xfs_trans_zone);
2054         kmem_zone_destroy(xfs_ifork_zone);
2055         kmem_zone_destroy(xfs_da_state_zone);
2056         kmem_zone_destroy(xfs_btree_cur_zone);
2057         kmem_zone_destroy(xfs_bmap_free_item_zone);
2058         kmem_zone_destroy(xfs_log_ticket_zone);
2059         bioset_exit(&xfs_ioend_bioset);
2060 }
2061
2062 STATIC int __init
2063 xfs_init_workqueues(void)
2064 {
2065         /*
2066          * The allocation workqueue can be used in memory reclaim situations
2067          * (writepage path), and parallelism is only limited by the number of
2068          * AGs in all the filesystems mounted. Hence use the default large
2069          * max_active value for this workqueue.
2070          */
2071         xfs_alloc_wq = alloc_workqueue("xfsalloc",
2072                         WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2073         if (!xfs_alloc_wq)
2074                 return -ENOMEM;
2075
2076         xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2077         if (!xfs_discard_wq)
2078                 goto out_free_alloc_wq;
2079
2080         return 0;
2081 out_free_alloc_wq:
2082         destroy_workqueue(xfs_alloc_wq);
2083         return -ENOMEM;
2084 }
2085
2086 STATIC void
2087 xfs_destroy_workqueues(void)
2088 {
2089         destroy_workqueue(xfs_discard_wq);
2090         destroy_workqueue(xfs_alloc_wq);
2091 }
2092
2093 STATIC int __init
2094 init_xfs_fs(void)
2095 {
2096         int                     error;
2097
2098         xfs_check_ondisk_structs();
2099
2100         printk(KERN_INFO XFS_VERSION_STRING " with "
2101                          XFS_BUILD_OPTIONS " enabled\n");
2102
2103         xfs_dir_startup();
2104
2105         error = xfs_init_zones();
2106         if (error)
2107                 goto out;
2108
2109         error = xfs_init_workqueues();
2110         if (error)
2111                 goto out_destroy_zones;
2112
2113         error = xfs_mru_cache_init();
2114         if (error)
2115                 goto out_destroy_wq;
2116
2117         error = xfs_buf_init();
2118         if (error)
2119                 goto out_mru_cache_uninit;
2120
2121         error = xfs_init_procfs();
2122         if (error)
2123                 goto out_buf_terminate;
2124
2125         error = xfs_sysctl_register();
2126         if (error)
2127                 goto out_cleanup_procfs;
2128
2129         xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2130         if (!xfs_kset) {
2131                 error = -ENOMEM;
2132                 goto out_sysctl_unregister;
2133         }
2134
2135         xfsstats.xs_kobj.kobject.kset = xfs_kset;
2136
2137         xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2138         if (!xfsstats.xs_stats) {
2139                 error = -ENOMEM;
2140                 goto out_kset_unregister;
2141         }
2142
2143         error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2144                                "stats");
2145         if (error)
2146                 goto out_free_stats;
2147
2148 #ifdef DEBUG
2149         xfs_dbg_kobj.kobject.kset = xfs_kset;
2150         error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2151         if (error)
2152                 goto out_remove_stats_kobj;
2153 #endif
2154
2155         error = xfs_qm_init();
2156         if (error)
2157                 goto out_remove_dbg_kobj;
2158
2159         error = register_filesystem(&xfs_fs_type);
2160         if (error)
2161                 goto out_qm_exit;
2162         return 0;
2163
2164  out_qm_exit:
2165         xfs_qm_exit();
2166  out_remove_dbg_kobj:
2167 #ifdef DEBUG
2168         xfs_sysfs_del(&xfs_dbg_kobj);
2169  out_remove_stats_kobj:
2170 #endif
2171         xfs_sysfs_del(&xfsstats.xs_kobj);
2172  out_free_stats:
2173         free_percpu(xfsstats.xs_stats);
2174  out_kset_unregister:
2175         kset_unregister(xfs_kset);
2176  out_sysctl_unregister:
2177         xfs_sysctl_unregister();
2178  out_cleanup_procfs:
2179         xfs_cleanup_procfs();
2180  out_buf_terminate:
2181         xfs_buf_terminate();
2182  out_mru_cache_uninit:
2183         xfs_mru_cache_uninit();
2184  out_destroy_wq:
2185         xfs_destroy_workqueues();
2186  out_destroy_zones:
2187         xfs_destroy_zones();
2188  out:
2189         return error;
2190 }
2191
2192 STATIC void __exit
2193 exit_xfs_fs(void)
2194 {
2195         xfs_qm_exit();
2196         unregister_filesystem(&xfs_fs_type);
2197 #ifdef DEBUG
2198         xfs_sysfs_del(&xfs_dbg_kobj);
2199 #endif
2200         xfs_sysfs_del(&xfsstats.xs_kobj);
2201         free_percpu(xfsstats.xs_stats);
2202         kset_unregister(xfs_kset);
2203         xfs_sysctl_unregister();
2204         xfs_cleanup_procfs();
2205         xfs_buf_terminate();
2206         xfs_mru_cache_uninit();
2207         xfs_destroy_workqueues();
2208         xfs_destroy_zones();
2209         xfs_uuid_table_free();
2210 }
2211
2212 module_init(init_xfs_fs);
2213 module_exit(exit_xfs_fs);
2214
2215 MODULE_AUTHOR("Silicon Graphics, Inc.");
2216 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2217 MODULE_LICENSE("GPL");