xfrm: Reinject transport-mode packets through tasklet
[sfrench/cifs-2.6.git] / fs / xfs / xfs_dquot.c
1 /*
2  * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_shared.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_inode.h"
28 #include "xfs_bmap.h"
29 #include "xfs_bmap_util.h"
30 #include "xfs_alloc.h"
31 #include "xfs_quota.h"
32 #include "xfs_error.h"
33 #include "xfs_trans.h"
34 #include "xfs_buf_item.h"
35 #include "xfs_trans_space.h"
36 #include "xfs_trans_priv.h"
37 #include "xfs_qm.h"
38 #include "xfs_cksum.h"
39 #include "xfs_trace.h"
40 #include "xfs_log.h"
41 #include "xfs_bmap_btree.h"
42
43 /*
44  * Lock order:
45  *
46  * ip->i_lock
47  *   qi->qi_tree_lock
48  *     dquot->q_qlock (xfs_dqlock() and friends)
49  *       dquot->q_flush (xfs_dqflock() and friends)
50  *       qi->qi_lru_lock
51  *
52  * If two dquots need to be locked the order is user before group/project,
53  * otherwise by the lowest id first, see xfs_dqlock2.
54  */
55
56 struct kmem_zone                *xfs_qm_dqtrxzone;
57 static struct kmem_zone         *xfs_qm_dqzone;
58
59 static struct lock_class_key xfs_dquot_group_class;
60 static struct lock_class_key xfs_dquot_project_class;
61
62 /*
63  * This is called to free all the memory associated with a dquot
64  */
65 void
66 xfs_qm_dqdestroy(
67         xfs_dquot_t     *dqp)
68 {
69         ASSERT(list_empty(&dqp->q_lru));
70
71         kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
72         mutex_destroy(&dqp->q_qlock);
73
74         XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
75         kmem_zone_free(xfs_qm_dqzone, dqp);
76 }
77
78 /*
79  * If default limits are in force, push them into the dquot now.
80  * We overwrite the dquot limits only if they are zero and this
81  * is not the root dquot.
82  */
83 void
84 xfs_qm_adjust_dqlimits(
85         struct xfs_mount        *mp,
86         struct xfs_dquot        *dq)
87 {
88         struct xfs_quotainfo    *q = mp->m_quotainfo;
89         struct xfs_disk_dquot   *d = &dq->q_core;
90         struct xfs_def_quota    *defq;
91         int                     prealloc = 0;
92
93         ASSERT(d->d_id);
94         defq = xfs_get_defquota(dq, q);
95
96         if (defq->bsoftlimit && !d->d_blk_softlimit) {
97                 d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
98                 prealloc = 1;
99         }
100         if (defq->bhardlimit && !d->d_blk_hardlimit) {
101                 d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
102                 prealloc = 1;
103         }
104         if (defq->isoftlimit && !d->d_ino_softlimit)
105                 d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
106         if (defq->ihardlimit && !d->d_ino_hardlimit)
107                 d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
108         if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
109                 d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
110         if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
111                 d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
112
113         if (prealloc)
114                 xfs_dquot_set_prealloc_limits(dq);
115 }
116
117 /*
118  * Check the limits and timers of a dquot and start or reset timers
119  * if necessary.
120  * This gets called even when quota enforcement is OFF, which makes our
121  * life a little less complicated. (We just don't reject any quota
122  * reservations in that case, when enforcement is off).
123  * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
124  * enforcement's off.
125  * In contrast, warnings are a little different in that they don't
126  * 'automatically' get started when limits get exceeded.  They do
127  * get reset to zero, however, when we find the count to be under
128  * the soft limit (they are only ever set non-zero via userspace).
129  */
130 void
131 xfs_qm_adjust_dqtimers(
132         xfs_mount_t             *mp,
133         xfs_disk_dquot_t        *d)
134 {
135         ASSERT(d->d_id);
136
137 #ifdef DEBUG
138         if (d->d_blk_hardlimit)
139                 ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
140                        be64_to_cpu(d->d_blk_hardlimit));
141         if (d->d_ino_hardlimit)
142                 ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
143                        be64_to_cpu(d->d_ino_hardlimit));
144         if (d->d_rtb_hardlimit)
145                 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
146                        be64_to_cpu(d->d_rtb_hardlimit));
147 #endif
148
149         if (!d->d_btimer) {
150                 if ((d->d_blk_softlimit &&
151                      (be64_to_cpu(d->d_bcount) >
152                       be64_to_cpu(d->d_blk_softlimit))) ||
153                     (d->d_blk_hardlimit &&
154                      (be64_to_cpu(d->d_bcount) >
155                       be64_to_cpu(d->d_blk_hardlimit)))) {
156                         d->d_btimer = cpu_to_be32(get_seconds() +
157                                         mp->m_quotainfo->qi_btimelimit);
158                 } else {
159                         d->d_bwarns = 0;
160                 }
161         } else {
162                 if ((!d->d_blk_softlimit ||
163                      (be64_to_cpu(d->d_bcount) <=
164                       be64_to_cpu(d->d_blk_softlimit))) &&
165                     (!d->d_blk_hardlimit ||
166                     (be64_to_cpu(d->d_bcount) <=
167                      be64_to_cpu(d->d_blk_hardlimit)))) {
168                         d->d_btimer = 0;
169                 }
170         }
171
172         if (!d->d_itimer) {
173                 if ((d->d_ino_softlimit &&
174                      (be64_to_cpu(d->d_icount) >
175                       be64_to_cpu(d->d_ino_softlimit))) ||
176                     (d->d_ino_hardlimit &&
177                      (be64_to_cpu(d->d_icount) >
178                       be64_to_cpu(d->d_ino_hardlimit)))) {
179                         d->d_itimer = cpu_to_be32(get_seconds() +
180                                         mp->m_quotainfo->qi_itimelimit);
181                 } else {
182                         d->d_iwarns = 0;
183                 }
184         } else {
185                 if ((!d->d_ino_softlimit ||
186                      (be64_to_cpu(d->d_icount) <=
187                       be64_to_cpu(d->d_ino_softlimit)))  &&
188                     (!d->d_ino_hardlimit ||
189                      (be64_to_cpu(d->d_icount) <=
190                       be64_to_cpu(d->d_ino_hardlimit)))) {
191                         d->d_itimer = 0;
192                 }
193         }
194
195         if (!d->d_rtbtimer) {
196                 if ((d->d_rtb_softlimit &&
197                      (be64_to_cpu(d->d_rtbcount) >
198                       be64_to_cpu(d->d_rtb_softlimit))) ||
199                     (d->d_rtb_hardlimit &&
200                      (be64_to_cpu(d->d_rtbcount) >
201                       be64_to_cpu(d->d_rtb_hardlimit)))) {
202                         d->d_rtbtimer = cpu_to_be32(get_seconds() +
203                                         mp->m_quotainfo->qi_rtbtimelimit);
204                 } else {
205                         d->d_rtbwarns = 0;
206                 }
207         } else {
208                 if ((!d->d_rtb_softlimit ||
209                      (be64_to_cpu(d->d_rtbcount) <=
210                       be64_to_cpu(d->d_rtb_softlimit))) &&
211                     (!d->d_rtb_hardlimit ||
212                      (be64_to_cpu(d->d_rtbcount) <=
213                       be64_to_cpu(d->d_rtb_hardlimit)))) {
214                         d->d_rtbtimer = 0;
215                 }
216         }
217 }
218
219 /*
220  * initialize a buffer full of dquots and log the whole thing
221  */
222 STATIC void
223 xfs_qm_init_dquot_blk(
224         xfs_trans_t     *tp,
225         xfs_mount_t     *mp,
226         xfs_dqid_t      id,
227         uint            type,
228         xfs_buf_t       *bp)
229 {
230         struct xfs_quotainfo    *q = mp->m_quotainfo;
231         xfs_dqblk_t     *d;
232         xfs_dqid_t      curid;
233         int             i;
234
235         ASSERT(tp);
236         ASSERT(xfs_buf_islocked(bp));
237
238         d = bp->b_addr;
239
240         /*
241          * ID of the first dquot in the block - id's are zero based.
242          */
243         curid = id - (id % q->qi_dqperchunk);
244         memset(d, 0, BBTOB(q->qi_dqchunklen));
245         for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
246                 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
247                 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
248                 d->dd_diskdq.d_id = cpu_to_be32(curid);
249                 d->dd_diskdq.d_flags = type;
250                 if (xfs_sb_version_hascrc(&mp->m_sb)) {
251                         uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
252                         xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
253                                          XFS_DQUOT_CRC_OFF);
254                 }
255         }
256
257         xfs_trans_dquot_buf(tp, bp,
258                             (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
259                             ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
260                              XFS_BLF_GDQUOT_BUF)));
261         xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
262 }
263
264 /*
265  * Initialize the dynamic speculative preallocation thresholds. The lo/hi
266  * watermarks correspond to the soft and hard limits by default. If a soft limit
267  * is not specified, we use 95% of the hard limit.
268  */
269 void
270 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
271 {
272         uint64_t space;
273
274         dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
275         dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
276         if (!dqp->q_prealloc_lo_wmark) {
277                 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
278                 do_div(dqp->q_prealloc_lo_wmark, 100);
279                 dqp->q_prealloc_lo_wmark *= 95;
280         }
281
282         space = dqp->q_prealloc_hi_wmark;
283
284         do_div(space, 100);
285         dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
286         dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
287         dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
288 }
289
290 /*
291  * Allocate a block and fill it with dquots.
292  * This is called when the bmapi finds a hole.
293  */
294 STATIC int
295 xfs_qm_dqalloc(
296         xfs_trans_t     **tpp,
297         xfs_mount_t     *mp,
298         xfs_dquot_t     *dqp,
299         xfs_inode_t     *quotip,
300         xfs_fileoff_t   offset_fsb,
301         xfs_buf_t       **O_bpp)
302 {
303         xfs_fsblock_t   firstblock;
304         struct xfs_defer_ops dfops;
305         xfs_bmbt_irec_t map;
306         int             nmaps, error;
307         xfs_buf_t       *bp;
308         xfs_trans_t     *tp = *tpp;
309
310         ASSERT(tp != NULL);
311
312         trace_xfs_dqalloc(dqp);
313
314         /*
315          * Initialize the bmap freelist prior to calling bmapi code.
316          */
317         xfs_defer_init(&dfops, &firstblock);
318         xfs_ilock(quotip, XFS_ILOCK_EXCL);
319         /*
320          * Return if this type of quotas is turned off while we didn't
321          * have an inode lock
322          */
323         if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
324                 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
325                 return -ESRCH;
326         }
327
328         xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
329         nmaps = 1;
330         error = xfs_bmapi_write(tp, quotip, offset_fsb,
331                                 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
332                                 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
333                                 &map, &nmaps, &dfops);
334         if (error)
335                 goto error0;
336         ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
337         ASSERT(nmaps == 1);
338         ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
339                (map.br_startblock != HOLESTARTBLOCK));
340
341         /*
342          * Keep track of the blkno to save a lookup later
343          */
344         dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
345
346         /* now we can just get the buffer (there's nothing to read yet) */
347         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
348                                dqp->q_blkno,
349                                mp->m_quotainfo->qi_dqchunklen,
350                                0);
351         if (!bp) {
352                 error = -ENOMEM;
353                 goto error1;
354         }
355         bp->b_ops = &xfs_dquot_buf_ops;
356
357         /*
358          * Make a chunk of dquots out of this buffer and log
359          * the entire thing.
360          */
361         xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
362                               dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
363
364         /*
365          * xfs_defer_finish() may commit the current transaction and
366          * start a second transaction if the freelist is not empty.
367          *
368          * Since we still want to modify this buffer, we need to
369          * ensure that the buffer is not released on commit of
370          * the first transaction and ensure the buffer is added to the
371          * second transaction.
372          *
373          * If there is only one transaction then don't stop the buffer
374          * from being released when it commits later on.
375          */
376
377         xfs_trans_bhold(tp, bp);
378
379         error = xfs_defer_finish(tpp, &dfops);
380         if (error)
381                 goto error1;
382
383         /* Transaction was committed? */
384         if (*tpp != tp) {
385                 tp = *tpp;
386                 xfs_trans_bjoin(tp, bp);
387         } else {
388                 xfs_trans_bhold_release(tp, bp);
389         }
390
391         *O_bpp = bp;
392         return 0;
393
394 error1:
395         xfs_defer_cancel(&dfops);
396 error0:
397         xfs_iunlock(quotip, XFS_ILOCK_EXCL);
398
399         return error;
400 }
401
402 STATIC int
403 xfs_qm_dqrepair(
404         struct xfs_mount        *mp,
405         struct xfs_trans        *tp,
406         struct xfs_dquot        *dqp,
407         xfs_dqid_t              firstid,
408         struct xfs_buf          **bpp)
409 {
410         int                     error;
411         struct xfs_disk_dquot   *ddq;
412         struct xfs_dqblk        *d;
413         int                     i;
414
415         /*
416          * Read the buffer without verification so we get the corrupted
417          * buffer returned to us. make sure we verify it on write, though.
418          */
419         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
420                                    mp->m_quotainfo->qi_dqchunklen,
421                                    0, bpp, NULL);
422
423         if (error) {
424                 ASSERT(*bpp == NULL);
425                 return error;
426         }
427         (*bpp)->b_ops = &xfs_dquot_buf_ops;
428
429         ASSERT(xfs_buf_islocked(*bpp));
430         d = (struct xfs_dqblk *)(*bpp)->b_addr;
431
432         /* Do the actual repair of dquots in this buffer */
433         for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
434                 ddq = &d[i].dd_diskdq;
435                 error = xfs_dqcheck(mp, ddq, firstid + i,
436                                        dqp->dq_flags & XFS_DQ_ALLTYPES,
437                                        XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
438                 if (error) {
439                         /* repair failed, we're screwed */
440                         xfs_trans_brelse(tp, *bpp);
441                         return -EIO;
442                 }
443         }
444
445         return 0;
446 }
447
448 /*
449  * Maps a dquot to the buffer containing its on-disk version.
450  * This returns a ptr to the buffer containing the on-disk dquot
451  * in the bpp param, and a ptr to the on-disk dquot within that buffer
452  */
453 STATIC int
454 xfs_qm_dqtobp(
455         xfs_trans_t             **tpp,
456         xfs_dquot_t             *dqp,
457         xfs_disk_dquot_t        **O_ddpp,
458         xfs_buf_t               **O_bpp,
459         uint                    flags)
460 {
461         struct xfs_bmbt_irec    map;
462         int                     nmaps = 1, error;
463         struct xfs_buf          *bp;
464         struct xfs_inode        *quotip;
465         struct xfs_mount        *mp = dqp->q_mount;
466         xfs_dqid_t              id = be32_to_cpu(dqp->q_core.d_id);
467         struct xfs_trans        *tp = (tpp ? *tpp : NULL);
468         uint                    lock_mode;
469
470         quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
471         dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
472
473         lock_mode = xfs_ilock_data_map_shared(quotip);
474         if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
475                 /*
476                  * Return if this type of quotas is turned off while we
477                  * didn't have the quota inode lock.
478                  */
479                 xfs_iunlock(quotip, lock_mode);
480                 return -ESRCH;
481         }
482
483         /*
484          * Find the block map; no allocations yet
485          */
486         error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
487                                XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
488
489         xfs_iunlock(quotip, lock_mode);
490         if (error)
491                 return error;
492
493         ASSERT(nmaps == 1);
494         ASSERT(map.br_blockcount == 1);
495
496         /*
497          * Offset of dquot in the (fixed sized) dquot chunk.
498          */
499         dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
500                 sizeof(xfs_dqblk_t);
501
502         ASSERT(map.br_startblock != DELAYSTARTBLOCK);
503         if (map.br_startblock == HOLESTARTBLOCK) {
504                 /*
505                  * We don't allocate unless we're asked to
506                  */
507                 if (!(flags & XFS_QMOPT_DQALLOC))
508                         return -ENOENT;
509
510                 ASSERT(tp);
511                 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
512                                         dqp->q_fileoffset, &bp);
513                 if (error)
514                         return error;
515                 tp = *tpp;
516         } else {
517                 trace_xfs_dqtobp_read(dqp);
518
519                 /*
520                  * store the blkno etc so that we don't have to do the
521                  * mapping all the time
522                  */
523                 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
524
525                 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
526                                            dqp->q_blkno,
527                                            mp->m_quotainfo->qi_dqchunklen,
528                                            0, &bp, &xfs_dquot_buf_ops);
529
530                 if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
531                         xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
532                                                 mp->m_quotainfo->qi_dqperchunk;
533                         ASSERT(bp == NULL);
534                         error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
535                 }
536
537                 if (error) {
538                         ASSERT(bp == NULL);
539                         return error;
540                 }
541         }
542
543         ASSERT(xfs_buf_islocked(bp));
544         *O_bpp = bp;
545         *O_ddpp = bp->b_addr + dqp->q_bufoffset;
546
547         return 0;
548 }
549
550
551 /*
552  * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
553  * and release the buffer immediately.
554  *
555  * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
556  */
557 int
558 xfs_qm_dqread(
559         struct xfs_mount        *mp,
560         xfs_dqid_t              id,
561         uint                    type,
562         uint                    flags,
563         struct xfs_dquot        **O_dqpp)
564 {
565         struct xfs_dquot        *dqp;
566         struct xfs_disk_dquot   *ddqp;
567         struct xfs_buf          *bp;
568         struct xfs_trans        *tp = NULL;
569         int                     error;
570
571         dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
572
573         dqp->dq_flags = type;
574         dqp->q_core.d_id = cpu_to_be32(id);
575         dqp->q_mount = mp;
576         INIT_LIST_HEAD(&dqp->q_lru);
577         mutex_init(&dqp->q_qlock);
578         init_waitqueue_head(&dqp->q_pinwait);
579
580         /*
581          * Because we want to use a counting completion, complete
582          * the flush completion once to allow a single access to
583          * the flush completion without blocking.
584          */
585         init_completion(&dqp->q_flush);
586         complete(&dqp->q_flush);
587
588         /*
589          * Make sure group quotas have a different lock class than user
590          * quotas.
591          */
592         switch (type) {
593         case XFS_DQ_USER:
594                 /* uses the default lock class */
595                 break;
596         case XFS_DQ_GROUP:
597                 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
598                 break;
599         case XFS_DQ_PROJ:
600                 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
601                 break;
602         default:
603                 ASSERT(0);
604                 break;
605         }
606
607         XFS_STATS_INC(mp, xs_qm_dquot);
608
609         trace_xfs_dqread(dqp);
610
611         if (flags & XFS_QMOPT_DQALLOC) {
612                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
613                                 XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
614                 if (error)
615                         goto error0;
616         }
617
618         /*
619          * get a pointer to the on-disk dquot and the buffer containing it
620          * dqp already knows its own type (GROUP/USER).
621          */
622         error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
623         if (error) {
624                 /*
625                  * This can happen if quotas got turned off (ESRCH),
626                  * or if the dquot didn't exist on disk and we ask to
627                  * allocate (ENOENT).
628                  */
629                 trace_xfs_dqread_fail(dqp);
630                 goto error1;
631         }
632
633         /* copy everything from disk dquot to the incore dquot */
634         memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
635         xfs_qm_dquot_logitem_init(dqp);
636
637         /*
638          * Reservation counters are defined as reservation plus current usage
639          * to avoid having to add every time.
640          */
641         dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
642         dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
643         dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
644
645         /* initialize the dquot speculative prealloc thresholds */
646         xfs_dquot_set_prealloc_limits(dqp);
647
648         /* Mark the buf so that this will stay incore a little longer */
649         xfs_buf_set_ref(bp, XFS_DQUOT_REF);
650
651         /*
652          * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
653          * So we need to release with xfs_trans_brelse().
654          * The strategy here is identical to that of inodes; we lock
655          * the dquot in xfs_qm_dqget() before making it accessible to
656          * others. This is because dquots, like inodes, need a good level of
657          * concurrency, and we don't want to take locks on the entire buffers
658          * for dquot accesses.
659          * Note also that the dquot buffer may even be dirty at this point, if
660          * this particular dquot was repaired. We still aren't afraid to
661          * brelse it because we have the changes incore.
662          */
663         ASSERT(xfs_buf_islocked(bp));
664         xfs_trans_brelse(tp, bp);
665
666         if (tp) {
667                 error = xfs_trans_commit(tp);
668                 if (error)
669                         goto error0;
670         }
671
672         *O_dqpp = dqp;
673         return error;
674
675 error1:
676         if (tp)
677                 xfs_trans_cancel(tp);
678 error0:
679         xfs_qm_dqdestroy(dqp);
680         *O_dqpp = NULL;
681         return error;
682 }
683
684 /*
685  * Advance to the next id in the current chunk, or if at the
686  * end of the chunk, skip ahead to first id in next allocated chunk
687  * using the SEEK_DATA interface.
688  */
689 static int
690 xfs_dq_get_next_id(
691         struct xfs_mount        *mp,
692         uint                    type,
693         xfs_dqid_t              *id)
694 {
695         struct xfs_inode        *quotip = xfs_quota_inode(mp, type);
696         xfs_dqid_t              next_id = *id + 1; /* simple advance */
697         uint                    lock_flags;
698         struct xfs_bmbt_irec    got;
699         struct xfs_iext_cursor  cur;
700         xfs_fsblock_t           start;
701         int                     error = 0;
702
703         /* If we'd wrap past the max ID, stop */
704         if (next_id < *id)
705                 return -ENOENT;
706
707         /* If new ID is within the current chunk, advancing it sufficed */
708         if (next_id % mp->m_quotainfo->qi_dqperchunk) {
709                 *id = next_id;
710                 return 0;
711         }
712
713         /* Nope, next_id is now past the current chunk, so find the next one */
714         start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
715
716         lock_flags = xfs_ilock_data_map_shared(quotip);
717         if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
718                 error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
719                 if (error)
720                         return error;
721         }
722
723         if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &cur, &got)) {
724                 /* contiguous chunk, bump startoff for the id calculation */
725                 if (got.br_startoff < start)
726                         got.br_startoff = start;
727                 *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
728         } else {
729                 error = -ENOENT;
730         }
731
732         xfs_iunlock(quotip, lock_flags);
733
734         return error;
735 }
736
737 /*
738  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
739  * a locked dquot, doing an allocation (if requested) as needed.
740  * When both an inode and an id are given, the inode's id takes precedence.
741  * That is, if the id changes while we don't hold the ilock inside this
742  * function, the new dquot is returned, not necessarily the one requested
743  * in the id argument.
744  */
745 int
746 xfs_qm_dqget(
747         xfs_mount_t     *mp,
748         xfs_inode_t     *ip,      /* locked inode (optional) */
749         xfs_dqid_t      id,       /* uid/projid/gid depending on type */
750         uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
751         uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
752         xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
753 {
754         struct xfs_quotainfo    *qi = mp->m_quotainfo;
755         struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
756         struct xfs_dquot        *dqp;
757         int                     error;
758
759         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
760         if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
761             (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
762             (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
763                 return -ESRCH;
764         }
765
766         ASSERT(type == XFS_DQ_USER ||
767                type == XFS_DQ_PROJ ||
768                type == XFS_DQ_GROUP);
769         if (ip) {
770                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
771                 ASSERT(xfs_inode_dquot(ip, type) == NULL);
772         }
773
774 restart:
775         mutex_lock(&qi->qi_tree_lock);
776         dqp = radix_tree_lookup(tree, id);
777         if (dqp) {
778                 xfs_dqlock(dqp);
779                 if (dqp->dq_flags & XFS_DQ_FREEING) {
780                         xfs_dqunlock(dqp);
781                         mutex_unlock(&qi->qi_tree_lock);
782                         trace_xfs_dqget_freeing(dqp);
783                         delay(1);
784                         goto restart;
785                 }
786
787                 /* uninit / unused quota found in radix tree, keep looking  */
788                 if (flags & XFS_QMOPT_DQNEXT) {
789                         if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
790                                 xfs_dqunlock(dqp);
791                                 mutex_unlock(&qi->qi_tree_lock);
792                                 error = xfs_dq_get_next_id(mp, type, &id);
793                                 if (error)
794                                         return error;
795                                 goto restart;
796                         }
797                 }
798
799                 dqp->q_nrefs++;
800                 mutex_unlock(&qi->qi_tree_lock);
801
802                 trace_xfs_dqget_hit(dqp);
803                 XFS_STATS_INC(mp, xs_qm_dqcachehits);
804                 *O_dqpp = dqp;
805                 return 0;
806         }
807         mutex_unlock(&qi->qi_tree_lock);
808         XFS_STATS_INC(mp, xs_qm_dqcachemisses);
809
810         /*
811          * Dquot cache miss. We don't want to keep the inode lock across
812          * a (potential) disk read. Also we don't want to deal with the lock
813          * ordering between quotainode and this inode. OTOH, dropping the inode
814          * lock here means dealing with a chown that can happen before
815          * we re-acquire the lock.
816          */
817         if (ip)
818                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
819
820         error = xfs_qm_dqread(mp, id, type, flags, &dqp);
821
822         if (ip)
823                 xfs_ilock(ip, XFS_ILOCK_EXCL);
824
825         /* If we are asked to find next active id, keep looking */
826         if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
827                 error = xfs_dq_get_next_id(mp, type, &id);
828                 if (!error)
829                         goto restart;
830         }
831
832         if (error)
833                 return error;
834
835         if (ip) {
836                 /*
837                  * A dquot could be attached to this inode by now, since
838                  * we had dropped the ilock.
839                  */
840                 if (xfs_this_quota_on(mp, type)) {
841                         struct xfs_dquot        *dqp1;
842
843                         dqp1 = xfs_inode_dquot(ip, type);
844                         if (dqp1) {
845                                 xfs_qm_dqdestroy(dqp);
846                                 dqp = dqp1;
847                                 xfs_dqlock(dqp);
848                                 goto dqret;
849                         }
850                 } else {
851                         /* inode stays locked on return */
852                         xfs_qm_dqdestroy(dqp);
853                         return -ESRCH;
854                 }
855         }
856
857         mutex_lock(&qi->qi_tree_lock);
858         error = radix_tree_insert(tree, id, dqp);
859         if (unlikely(error)) {
860                 WARN_ON(error != -EEXIST);
861
862                 /*
863                  * Duplicate found. Just throw away the new dquot and start
864                  * over.
865                  */
866                 mutex_unlock(&qi->qi_tree_lock);
867                 trace_xfs_dqget_dup(dqp);
868                 xfs_qm_dqdestroy(dqp);
869                 XFS_STATS_INC(mp, xs_qm_dquot_dups);
870                 goto restart;
871         }
872
873         /*
874          * We return a locked dquot to the caller, with a reference taken
875          */
876         xfs_dqlock(dqp);
877         dqp->q_nrefs = 1;
878
879         qi->qi_dquots++;
880         mutex_unlock(&qi->qi_tree_lock);
881
882         /* If we are asked to find next active id, keep looking */
883         if (flags & XFS_QMOPT_DQNEXT) {
884                 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
885                         xfs_qm_dqput(dqp);
886                         error = xfs_dq_get_next_id(mp, type, &id);
887                         if (error)
888                                 return error;
889                         goto restart;
890                 }
891         }
892
893  dqret:
894         ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
895         trace_xfs_dqget_miss(dqp);
896         *O_dqpp = dqp;
897         return 0;
898 }
899
900 /*
901  * Release a reference to the dquot (decrement ref-count) and unlock it.
902  *
903  * If there is a group quota attached to this dquot, carefully release that
904  * too without tripping over deadlocks'n'stuff.
905  */
906 void
907 xfs_qm_dqput(
908         struct xfs_dquot        *dqp)
909 {
910         ASSERT(dqp->q_nrefs > 0);
911         ASSERT(XFS_DQ_IS_LOCKED(dqp));
912
913         trace_xfs_dqput(dqp);
914
915         if (--dqp->q_nrefs == 0) {
916                 struct xfs_quotainfo    *qi = dqp->q_mount->m_quotainfo;
917                 trace_xfs_dqput_free(dqp);
918
919                 if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
920                         XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
921         }
922         xfs_dqunlock(dqp);
923 }
924
925 /*
926  * Release a dquot. Flush it if dirty, then dqput() it.
927  * dquot must not be locked.
928  */
929 void
930 xfs_qm_dqrele(
931         xfs_dquot_t     *dqp)
932 {
933         if (!dqp)
934                 return;
935
936         trace_xfs_dqrele(dqp);
937
938         xfs_dqlock(dqp);
939         /*
940          * We don't care to flush it if the dquot is dirty here.
941          * That will create stutters that we want to avoid.
942          * Instead we do a delayed write when we try to reclaim
943          * a dirty dquot. Also xfs_sync will take part of the burden...
944          */
945         xfs_qm_dqput(dqp);
946 }
947
948 /*
949  * This is the dquot flushing I/O completion routine.  It is called
950  * from interrupt level when the buffer containing the dquot is
951  * flushed to disk.  It is responsible for removing the dquot logitem
952  * from the AIL if it has not been re-logged, and unlocking the dquot's
953  * flush lock. This behavior is very similar to that of inodes..
954  */
955 STATIC void
956 xfs_qm_dqflush_done(
957         struct xfs_buf          *bp,
958         struct xfs_log_item     *lip)
959 {
960         xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
961         xfs_dquot_t             *dqp = qip->qli_dquot;
962         struct xfs_ail          *ailp = lip->li_ailp;
963
964         /*
965          * We only want to pull the item from the AIL if its
966          * location in the log has not changed since we started the flush.
967          * Thus, we only bother if the dquot's lsn has
968          * not changed. First we check the lsn outside the lock
969          * since it's cheaper, and then we recheck while
970          * holding the lock before removing the dquot from the AIL.
971          */
972         if ((lip->li_flags & XFS_LI_IN_AIL) &&
973             lip->li_lsn == qip->qli_flush_lsn) {
974
975                 /* xfs_trans_ail_delete() drops the AIL lock. */
976                 spin_lock(&ailp->xa_lock);
977                 if (lip->li_lsn == qip->qli_flush_lsn)
978                         xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
979                 else
980                         spin_unlock(&ailp->xa_lock);
981         }
982
983         /*
984          * Release the dq's flush lock since we're done with it.
985          */
986         xfs_dqfunlock(dqp);
987 }
988
989 /*
990  * Write a modified dquot to disk.
991  * The dquot must be locked and the flush lock too taken by caller.
992  * The flush lock will not be unlocked until the dquot reaches the disk,
993  * but the dquot is free to be unlocked and modified by the caller
994  * in the interim. Dquot is still locked on return. This behavior is
995  * identical to that of inodes.
996  */
997 int
998 xfs_qm_dqflush(
999         struct xfs_dquot        *dqp,
1000         struct xfs_buf          **bpp)
1001 {
1002         struct xfs_mount        *mp = dqp->q_mount;
1003         struct xfs_buf          *bp;
1004         struct xfs_disk_dquot   *ddqp;
1005         int                     error;
1006
1007         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1008         ASSERT(!completion_done(&dqp->q_flush));
1009
1010         trace_xfs_dqflush(dqp);
1011
1012         *bpp = NULL;
1013
1014         xfs_qm_dqunpin_wait(dqp);
1015
1016         /*
1017          * This may have been unpinned because the filesystem is shutting
1018          * down forcibly. If that's the case we must not write this dquot
1019          * to disk, because the log record didn't make it to disk.
1020          *
1021          * We also have to remove the log item from the AIL in this case,
1022          * as we wait for an emptry AIL as part of the unmount process.
1023          */
1024         if (XFS_FORCED_SHUTDOWN(mp)) {
1025                 struct xfs_log_item     *lip = &dqp->q_logitem.qli_item;
1026                 dqp->dq_flags &= ~XFS_DQ_DIRTY;
1027
1028                 xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE);
1029
1030                 error = -EIO;
1031                 goto out_unlock;
1032         }
1033
1034         /*
1035          * Get the buffer containing the on-disk dquot
1036          */
1037         error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
1038                                    mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1039                                    &xfs_dquot_buf_ops);
1040         if (error)
1041                 goto out_unlock;
1042
1043         /*
1044          * Calculate the location of the dquot inside the buffer.
1045          */
1046         ddqp = bp->b_addr + dqp->q_bufoffset;
1047
1048         /*
1049          * A simple sanity check in case we got a corrupted dquot..
1050          */
1051         error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1052                            XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1053         if (error) {
1054                 xfs_buf_relse(bp);
1055                 xfs_dqfunlock(dqp);
1056                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1057                 return -EIO;
1058         }
1059
1060         /* This is the only portion of data that needs to persist */
1061         memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
1062
1063         /*
1064          * Clear the dirty field and remember the flush lsn for later use.
1065          */
1066         dqp->dq_flags &= ~XFS_DQ_DIRTY;
1067
1068         xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1069                                         &dqp->q_logitem.qli_item.li_lsn);
1070
1071         /*
1072          * copy the lsn into the on-disk dquot now while we have the in memory
1073          * dquot here. This can't be done later in the write verifier as we
1074          * can't get access to the log item at that point in time.
1075          *
1076          * We also calculate the CRC here so that the on-disk dquot in the
1077          * buffer always has a valid CRC. This ensures there is no possibility
1078          * of a dquot without an up-to-date CRC getting to disk.
1079          */
1080         if (xfs_sb_version_hascrc(&mp->m_sb)) {
1081                 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1082
1083                 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1084                 xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
1085                                  XFS_DQUOT_CRC_OFF);
1086         }
1087
1088         /*
1089          * Attach an iodone routine so that we can remove this dquot from the
1090          * AIL and release the flush lock once the dquot is synced to disk.
1091          */
1092         xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
1093                                   &dqp->q_logitem.qli_item);
1094
1095         /*
1096          * If the buffer is pinned then push on the log so we won't
1097          * get stuck waiting in the write for too long.
1098          */
1099         if (xfs_buf_ispinned(bp)) {
1100                 trace_xfs_dqflush_force(dqp);
1101                 xfs_log_force(mp, 0);
1102         }
1103
1104         trace_xfs_dqflush_done(dqp);
1105         *bpp = bp;
1106         return 0;
1107
1108 out_unlock:
1109         xfs_dqfunlock(dqp);
1110         return -EIO;
1111 }
1112
1113 /*
1114  * Lock two xfs_dquot structures.
1115  *
1116  * To avoid deadlocks we always lock the quota structure with
1117  * the lowerd id first.
1118  */
1119 void
1120 xfs_dqlock2(
1121         xfs_dquot_t     *d1,
1122         xfs_dquot_t     *d2)
1123 {
1124         if (d1 && d2) {
1125                 ASSERT(d1 != d2);
1126                 if (be32_to_cpu(d1->q_core.d_id) >
1127                     be32_to_cpu(d2->q_core.d_id)) {
1128                         mutex_lock(&d2->q_qlock);
1129                         mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
1130                 } else {
1131                         mutex_lock(&d1->q_qlock);
1132                         mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
1133                 }
1134         } else if (d1) {
1135                 mutex_lock(&d1->q_qlock);
1136         } else if (d2) {
1137                 mutex_lock(&d2->q_qlock);
1138         }
1139 }
1140
1141 int __init
1142 xfs_qm_init(void)
1143 {
1144         xfs_qm_dqzone =
1145                 kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
1146         if (!xfs_qm_dqzone)
1147                 goto out;
1148
1149         xfs_qm_dqtrxzone =
1150                 kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
1151         if (!xfs_qm_dqtrxzone)
1152                 goto out_free_dqzone;
1153
1154         return 0;
1155
1156 out_free_dqzone:
1157         kmem_zone_destroy(xfs_qm_dqzone);
1158 out:
1159         return -ENOMEM;
1160 }
1161
1162 void
1163 xfs_qm_exit(void)
1164 {
1165         kmem_zone_destroy(xfs_qm_dqtrxzone);
1166         kmem_zone_destroy(xfs_qm_dqzone);
1167 }