[XFS] Fix inode size update before data write in xfs_setattr
[sfrench/cifs-2.6.git] / fs / xfs / xfs_vnodeops.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #include "xfs.h"
20 #include "xfs_fs.h"
21 #include "xfs_types.h"
22 #include "xfs_bit.h"
23 #include "xfs_log.h"
24 #include "xfs_inum.h"
25 #include "xfs_trans.h"
26 #include "xfs_sb.h"
27 #include "xfs_ag.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_da_btree.h"
32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_inode_item.h"
40 #include "xfs_itable.h"
41 #include "xfs_btree.h"
42 #include "xfs_ialloc.h"
43 #include "xfs_alloc.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_rw.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_utils.h"
50 #include "xfs_rtalloc.h"
51 #include "xfs_refcache.h"
52 #include "xfs_trans_space.h"
53 #include "xfs_log_priv.h"
54 #include "xfs_filestream.h"
55
56 STATIC int
57 xfs_open(
58         bhv_desc_t      *bdp,
59         cred_t          *credp)
60 {
61         int             mode;
62         bhv_vnode_t     *vp = BHV_TO_VNODE(bdp);
63         xfs_inode_t     *ip = XFS_BHVTOI(bdp);
64
65         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
66                 return XFS_ERROR(EIO);
67
68         /*
69          * If it's a directory with any blocks, read-ahead block 0
70          * as we're almost certain to have the next operation be a read there.
71          */
72         if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) {
73                 mode = xfs_ilock_map_shared(ip);
74                 if (ip->i_d.di_nextents > 0)
75                         (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
76                 xfs_iunlock(ip, mode);
77         }
78         return 0;
79 }
80
81 /*
82  * xfs_getattr
83  */
84 STATIC int
85 xfs_getattr(
86         bhv_desc_t      *bdp,
87         bhv_vattr_t     *vap,
88         int             flags,
89         cred_t          *credp)
90 {
91         xfs_inode_t     *ip;
92         xfs_mount_t     *mp;
93         bhv_vnode_t     *vp;
94
95         vp  = BHV_TO_VNODE(bdp);
96         vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
97
98         ip = XFS_BHVTOI(bdp);
99         mp = ip->i_mount;
100
101         if (XFS_FORCED_SHUTDOWN(mp))
102                 return XFS_ERROR(EIO);
103
104         if (!(flags & ATTR_LAZY))
105                 xfs_ilock(ip, XFS_ILOCK_SHARED);
106
107         vap->va_size = XFS_ISIZE(ip);
108         if (vap->va_mask == XFS_AT_SIZE)
109                 goto all_done;
110
111         vap->va_nblocks =
112                 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
113         vap->va_nodeid = ip->i_ino;
114 #if XFS_BIG_INUMS
115         vap->va_nodeid += mp->m_inoadd;
116 #endif
117         vap->va_nlink = ip->i_d.di_nlink;
118
119         /*
120          * Quick exit for non-stat callers
121          */
122         if ((vap->va_mask &
123             ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
124               XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
125                 goto all_done;
126
127         /*
128          * Copy from in-core inode.
129          */
130         vap->va_mode = ip->i_d.di_mode;
131         vap->va_uid = ip->i_d.di_uid;
132         vap->va_gid = ip->i_d.di_gid;
133         vap->va_projid = ip->i_d.di_projid;
134
135         /*
136          * Check vnode type block/char vs. everything else.
137          */
138         switch (ip->i_d.di_mode & S_IFMT) {
139         case S_IFBLK:
140         case S_IFCHR:
141                 vap->va_rdev = ip->i_df.if_u2.if_rdev;
142                 vap->va_blocksize = BLKDEV_IOSIZE;
143                 break;
144         default:
145                 vap->va_rdev = 0;
146
147                 if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
148                         vap->va_blocksize = xfs_preferred_iosize(mp);
149                 } else {
150
151                         /*
152                          * If the file blocks are being allocated from a
153                          * realtime partition, then return the inode's
154                          * realtime extent size or the realtime volume's
155                          * extent size.
156                          */
157                         vap->va_blocksize =
158                                 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
159                 }
160                 break;
161         }
162
163         vn_atime_to_timespec(vp, &vap->va_atime);
164         vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
165         vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
166         vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
167         vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
168
169         /*
170          * Exit for stat callers.  See if any of the rest of the fields
171          * to be filled in are needed.
172          */
173         if ((vap->va_mask &
174              (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
175               XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
176                 goto all_done;
177
178         /*
179          * Convert di_flags to xflags.
180          */
181         vap->va_xflags = xfs_ip2xflags(ip);
182
183         /*
184          * Exit for inode revalidate.  See if any of the rest of
185          * the fields to be filled in are needed.
186          */
187         if ((vap->va_mask &
188              (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
189               XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
190                 goto all_done;
191
192         vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
193         vap->va_nextents =
194                 (ip->i_df.if_flags & XFS_IFEXTENTS) ?
195                         ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
196                         ip->i_d.di_nextents;
197         if (ip->i_afp)
198                 vap->va_anextents =
199                         (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
200                                 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
201                                  ip->i_d.di_anextents;
202         else
203                 vap->va_anextents = 0;
204         vap->va_gen = ip->i_d.di_gen;
205
206  all_done:
207         if (!(flags & ATTR_LAZY))
208                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
209         return 0;
210 }
211
212
213 /*
214  * xfs_setattr
215  */
216 int
217 xfs_setattr(
218         bhv_desc_t              *bdp,
219         bhv_vattr_t             *vap,
220         int                     flags,
221         cred_t                  *credp)
222 {
223         xfs_inode_t             *ip;
224         xfs_trans_t             *tp;
225         xfs_mount_t             *mp;
226         int                     mask;
227         int                     code;
228         uint                    lock_flags;
229         uint                    commit_flags=0;
230         uid_t                   uid=0, iuid=0;
231         gid_t                   gid=0, igid=0;
232         int                     timeflags = 0;
233         bhv_vnode_t             *vp;
234         xfs_prid_t              projid=0, iprojid=0;
235         int                     mandlock_before, mandlock_after;
236         struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
237         int                     file_owner;
238         int                     need_iolock = 1;
239
240         vp = BHV_TO_VNODE(bdp);
241         vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
242
243         if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
244                 return XFS_ERROR(EROFS);
245
246         /*
247          * Cannot set certain attributes.
248          */
249         mask = vap->va_mask;
250         if (mask & XFS_AT_NOSET) {
251                 return XFS_ERROR(EINVAL);
252         }
253
254         ip = XFS_BHVTOI(bdp);
255         mp = ip->i_mount;
256
257         if (XFS_FORCED_SHUTDOWN(mp))
258                 return XFS_ERROR(EIO);
259
260         /*
261          * Timestamps do not need to be logged and hence do not
262          * need to be done within a transaction.
263          */
264         if (mask & XFS_AT_UPDTIMES) {
265                 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
266                 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
267                             ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
268                             ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
269                 xfs_ichgtime(ip, timeflags);
270                 return 0;
271         }
272
273         olddquot1 = olddquot2 = NULL;
274         udqp = gdqp = NULL;
275
276         /*
277          * If disk quotas is on, we make sure that the dquots do exist on disk,
278          * before we start any other transactions. Trying to do this later
279          * is messy. We don't care to take a readlock to look at the ids
280          * in inode here, because we can't hold it across the trans_reserve.
281          * If the IDs do change before we take the ilock, we're covered
282          * because the i_*dquot fields will get updated anyway.
283          */
284         if (XFS_IS_QUOTA_ON(mp) &&
285             (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
286                 uint    qflags = 0;
287
288                 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
289                         uid = vap->va_uid;
290                         qflags |= XFS_QMOPT_UQUOTA;
291                 } else {
292                         uid = ip->i_d.di_uid;
293                 }
294                 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
295                         gid = vap->va_gid;
296                         qflags |= XFS_QMOPT_GQUOTA;
297                 }  else {
298                         gid = ip->i_d.di_gid;
299                 }
300                 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
301                         projid = vap->va_projid;
302                         qflags |= XFS_QMOPT_PQUOTA;
303                 }  else {
304                         projid = ip->i_d.di_projid;
305                 }
306                 /*
307                  * We take a reference when we initialize udqp and gdqp,
308                  * so it is important that we never blindly double trip on
309                  * the same variable. See xfs_create() for an example.
310                  */
311                 ASSERT(udqp == NULL);
312                 ASSERT(gdqp == NULL);
313                 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
314                                          &udqp, &gdqp);
315                 if (code)
316                         return code;
317         }
318
319         /*
320          * For the other attributes, we acquire the inode lock and
321          * first do an error checking pass.
322          */
323         tp = NULL;
324         lock_flags = XFS_ILOCK_EXCL;
325         if (flags & ATTR_NOLOCK)
326                 need_iolock = 0;
327         if (!(mask & XFS_AT_SIZE)) {
328                 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
329                     (mp->m_flags & XFS_MOUNT_WSYNC)) {
330                         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
331                         commit_flags = 0;
332                         if ((code = xfs_trans_reserve(tp, 0,
333                                                      XFS_ICHANGE_LOG_RES(mp), 0,
334                                                      0, 0))) {
335                                 lock_flags = 0;
336                                 goto error_return;
337                         }
338                 }
339         } else {
340                 if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) &&
341                     !(flags & ATTR_DMI)) {
342                         int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
343                         code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
344                                 vap->va_size, 0, dmflags, NULL);
345                         if (code) {
346                                 lock_flags = 0;
347                                 goto error_return;
348                         }
349                 }
350                 if (need_iolock)
351                         lock_flags |= XFS_IOLOCK_EXCL;
352         }
353
354         xfs_ilock(ip, lock_flags);
355
356         /* boolean: are we the file owner? */
357         file_owner = (current_fsuid(credp) == ip->i_d.di_uid);
358
359         /*
360          * Change various properties of a file.
361          * Only the owner or users with CAP_FOWNER
362          * capability may do these things.
363          */
364         if (mask &
365             (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
366              XFS_AT_GID|XFS_AT_PROJID)) {
367                 /*
368                  * CAP_FOWNER overrides the following restrictions:
369                  *
370                  * The user ID of the calling process must be equal
371                  * to the file owner ID, except in cases where the
372                  * CAP_FSETID capability is applicable.
373                  */
374                 if (!file_owner && !capable(CAP_FOWNER)) {
375                         code = XFS_ERROR(EPERM);
376                         goto error_return;
377                 }
378
379                 /*
380                  * CAP_FSETID overrides the following restrictions:
381                  *
382                  * The effective user ID of the calling process shall match
383                  * the file owner when setting the set-user-ID and
384                  * set-group-ID bits on that file.
385                  *
386                  * The effective group ID or one of the supplementary group
387                  * IDs of the calling process shall match the group owner of
388                  * the file when setting the set-group-ID bit on that file
389                  */
390                 if (mask & XFS_AT_MODE) {
391                         mode_t m = 0;
392
393                         if ((vap->va_mode & S_ISUID) && !file_owner)
394                                 m |= S_ISUID;
395                         if ((vap->va_mode & S_ISGID) &&
396                             !in_group_p((gid_t)ip->i_d.di_gid))
397                                 m |= S_ISGID;
398 #if 0
399                         /* Linux allows this, Irix doesn't. */
400                         if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
401                                 m |= S_ISVTX;
402 #endif
403                         if (m && !capable(CAP_FSETID))
404                                 vap->va_mode &= ~m;
405                 }
406         }
407
408         /*
409          * Change file ownership.  Must be the owner or privileged.
410          * If the system was configured with the "restricted_chown"
411          * option, the owner is not permitted to give away the file,
412          * and can change the group id only to a group of which he
413          * or she is a member.
414          */
415         if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
416                 /*
417                  * These IDs could have changed since we last looked at them.
418                  * But, we're assured that if the ownership did change
419                  * while we didn't have the inode locked, inode's dquot(s)
420                  * would have changed also.
421                  */
422                 iuid = ip->i_d.di_uid;
423                 iprojid = ip->i_d.di_projid;
424                 igid = ip->i_d.di_gid;
425                 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
426                 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
427                 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
428                          iprojid;
429
430                 /*
431                  * CAP_CHOWN overrides the following restrictions:
432                  *
433                  * If _POSIX_CHOWN_RESTRICTED is defined, this capability
434                  * shall override the restriction that a process cannot
435                  * change the user ID of a file it owns and the restriction
436                  * that the group ID supplied to the chown() function
437                  * shall be equal to either the group ID or one of the
438                  * supplementary group IDs of the calling process.
439                  */
440                 if (restricted_chown &&
441                     (iuid != uid || (igid != gid &&
442                                      !in_group_p((gid_t)gid))) &&
443                     !capable(CAP_CHOWN)) {
444                         code = XFS_ERROR(EPERM);
445                         goto error_return;
446                 }
447                 /*
448                  * Do a quota reservation only if uid/projid/gid is actually
449                  * going to change.
450                  */
451                 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
452                     (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
453                     (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
454                         ASSERT(tp);
455                         code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
456                                                 capable(CAP_FOWNER) ?
457                                                 XFS_QMOPT_FORCE_RES : 0);
458                         if (code)       /* out of quota */
459                                 goto error_return;
460                 }
461         }
462
463         /*
464          * Truncate file.  Must have write permission and not be a directory.
465          */
466         if (mask & XFS_AT_SIZE) {
467                 /* Short circuit the truncate case for zero length files */
468                 if ((vap->va_size == 0) &&
469                    (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
470                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
471                         lock_flags &= ~XFS_ILOCK_EXCL;
472                         if (mask & XFS_AT_CTIME)
473                                 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
474                         code = 0;
475                         goto error_return;
476                 }
477
478                 if (VN_ISDIR(vp)) {
479                         code = XFS_ERROR(EISDIR);
480                         goto error_return;
481                 } else if (!VN_ISREG(vp)) {
482                         code = XFS_ERROR(EINVAL);
483                         goto error_return;
484                 }
485                 /*
486                  * Make sure that the dquots are attached to the inode.
487                  */
488                 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED)))
489                         goto error_return;
490         }
491
492         /*
493          * Change file access or modified times.
494          */
495         if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
496                 if (!file_owner) {
497                         if ((flags & ATTR_UTIME) &&
498                             !capable(CAP_FOWNER)) {
499                                 code = XFS_ERROR(EPERM);
500                                 goto error_return;
501                         }
502                 }
503         }
504
505         /*
506          * Change extent size or realtime flag.
507          */
508         if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
509                 /*
510                  * Can't change extent size if any extents are allocated.
511                  */
512                 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
513                     ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
514                      vap->va_extsize) ) {
515                         code = XFS_ERROR(EINVAL);       /* EFBIG? */
516                         goto error_return;
517                 }
518
519                 /*
520                  * Can't change realtime flag if any extents are allocated.
521                  */
522                 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
523                     (mask & XFS_AT_XFLAGS) &&
524                     (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
525                     (vap->va_xflags & XFS_XFLAG_REALTIME)) {
526                         code = XFS_ERROR(EINVAL);       /* EFBIG? */
527                         goto error_return;
528                 }
529                 /*
530                  * Extent size must be a multiple of the appropriate block
531                  * size, if set at all.
532                  */
533                 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
534                         xfs_extlen_t    size;
535
536                         if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
537                             ((mask & XFS_AT_XFLAGS) &&
538                             (vap->va_xflags & XFS_XFLAG_REALTIME))) {
539                                 size = mp->m_sb.sb_rextsize <<
540                                        mp->m_sb.sb_blocklog;
541                         } else {
542                                 size = mp->m_sb.sb_blocksize;
543                         }
544                         if (vap->va_extsize % size) {
545                                 code = XFS_ERROR(EINVAL);
546                                 goto error_return;
547                         }
548                 }
549                 /*
550                  * If realtime flag is set then must have realtime data.
551                  */
552                 if ((mask & XFS_AT_XFLAGS) &&
553                     (vap->va_xflags & XFS_XFLAG_REALTIME)) {
554                         if ((mp->m_sb.sb_rblocks == 0) ||
555                             (mp->m_sb.sb_rextsize == 0) ||
556                             (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
557                                 code = XFS_ERROR(EINVAL);
558                                 goto error_return;
559                         }
560                 }
561
562                 /*
563                  * Can't modify an immutable/append-only file unless
564                  * we have appropriate permission.
565                  */
566                 if ((mask & XFS_AT_XFLAGS) &&
567                     (ip->i_d.di_flags &
568                                 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
569                      (vap->va_xflags &
570                                 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
571                     !capable(CAP_LINUX_IMMUTABLE)) {
572                         code = XFS_ERROR(EPERM);
573                         goto error_return;
574                 }
575         }
576
577         /*
578          * Now we can make the changes.  Before we join the inode
579          * to the transaction, if XFS_AT_SIZE is set then take care of
580          * the part of the truncation that must be done without the
581          * inode lock.  This needs to be done before joining the inode
582          * to the transaction, because the inode cannot be unlocked
583          * once it is a part of the transaction.
584          */
585         if (mask & XFS_AT_SIZE) {
586                 code = 0;
587                 if ((vap->va_size > ip->i_size) &&
588                     (flags & ATTR_NOSIZETOK) == 0) {
589                         code = xfs_igrow_start(ip, vap->va_size, credp);
590                 }
591                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
592
593                 /*
594                  * We are going to log the inode size change in this
595                  * transaction so any previous writes that are beyond the on
596                  * disk EOF and the new EOF that have not been written out need
597                  * to be written here. If we do not write the data out, we
598                  * expose ourselves to the null files problem.
599                  *
600                  * Only flush from the on disk size to the smaller of the in
601                  * memory file size or the new size as that's the range we
602                  * really care about here and prevents waiting for other data
603                  * not within the range we care about here.
604                  */
605                 if (!code &&
606                     (ip->i_size != ip->i_d.di_size) &&
607                     (vap->va_size > ip->i_d.di_size)) {
608                         code = bhv_vop_flush_pages(XFS_ITOV(ip),
609                                         ip->i_d.di_size, vap->va_size,
610                                         XFS_B_ASYNC, FI_NONE);
611                 }
612
613                 /* wait for all I/O to complete */
614                 vn_iowait(vp);
615
616                 if (!code)
617                         code = xfs_itruncate_data(ip, vap->va_size);
618                 if (code) {
619                         ASSERT(tp == NULL);
620                         lock_flags &= ~XFS_ILOCK_EXCL;
621                         ASSERT(lock_flags == XFS_IOLOCK_EXCL);
622                         goto error_return;
623                 }
624                 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
625                 if ((code = xfs_trans_reserve(tp, 0,
626                                              XFS_ITRUNCATE_LOG_RES(mp), 0,
627                                              XFS_TRANS_PERM_LOG_RES,
628                                              XFS_ITRUNCATE_LOG_COUNT))) {
629                         xfs_trans_cancel(tp, 0);
630                         if (need_iolock)
631                                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
632                         return code;
633                 }
634                 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
635                 xfs_ilock(ip, XFS_ILOCK_EXCL);
636         }
637
638         if (tp) {
639                 xfs_trans_ijoin(tp, ip, lock_flags);
640                 xfs_trans_ihold(tp, ip);
641         }
642
643         /* determine whether mandatory locking mode changes */
644         mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
645
646         /*
647          * Truncate file.  Must have write permission and not be a directory.
648          */
649         if (mask & XFS_AT_SIZE) {
650                 if (vap->va_size > ip->i_size) {
651                         xfs_igrow_finish(tp, ip, vap->va_size,
652                             !(flags & ATTR_DMI));
653                 } else if ((vap->va_size <= ip->i_size) ||
654                            ((vap->va_size == 0) && ip->i_d.di_nextents)) {
655                         /*
656                          * signal a sync transaction unless
657                          * we're truncating an already unlinked
658                          * file on a wsync filesystem
659                          */
660                         code = xfs_itruncate_finish(&tp, ip,
661                                             (xfs_fsize_t)vap->va_size,
662                                             XFS_DATA_FORK,
663                                             ((ip->i_d.di_nlink != 0 ||
664                                               !(mp->m_flags & XFS_MOUNT_WSYNC))
665                                              ? 1 : 0));
666                         if (code)
667                                 goto abort_return;
668                         /*
669                          * Truncated "down", so we're removing references
670                          * to old data here - if we now delay flushing for
671                          * a long time, we expose ourselves unduly to the
672                          * notorious NULL files problem.  So, we mark this
673                          * vnode and flush it when the file is closed, and
674                          * do not wait the usual (long) time for writeout.
675                          */
676                         VTRUNCATE(vp);
677                 }
678                 /*
679                  * Have to do this even if the file's size doesn't change.
680                  */
681                 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
682         }
683
684         /*
685          * Change file access modes.
686          */
687         if (mask & XFS_AT_MODE) {
688                 ip->i_d.di_mode &= S_IFMT;
689                 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
690
691                 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
692                 timeflags |= XFS_ICHGTIME_CHG;
693         }
694
695         /*
696          * Change file ownership.  Must be the owner or privileged.
697          * If the system was configured with the "restricted_chown"
698          * option, the owner is not permitted to give away the file,
699          * and can change the group id only to a group of which he
700          * or she is a member.
701          */
702         if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
703                 /*
704                  * CAP_FSETID overrides the following restrictions:
705                  *
706                  * The set-user-ID and set-group-ID bits of a file will be
707                  * cleared upon successful return from chown()
708                  */
709                 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
710                     !capable(CAP_FSETID)) {
711                         ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
712                 }
713
714                 /*
715                  * Change the ownerships and register quota modifications
716                  * in the transaction.
717                  */
718                 if (iuid != uid) {
719                         if (XFS_IS_UQUOTA_ON(mp)) {
720                                 ASSERT(mask & XFS_AT_UID);
721                                 ASSERT(udqp);
722                                 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
723                                                         &ip->i_udquot, udqp);
724                         }
725                         ip->i_d.di_uid = uid;
726                 }
727                 if (igid != gid) {
728                         if (XFS_IS_GQUOTA_ON(mp)) {
729                                 ASSERT(!XFS_IS_PQUOTA_ON(mp));
730                                 ASSERT(mask & XFS_AT_GID);
731                                 ASSERT(gdqp);
732                                 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
733                                                         &ip->i_gdquot, gdqp);
734                         }
735                         ip->i_d.di_gid = gid;
736                 }
737                 if (iprojid != projid) {
738                         if (XFS_IS_PQUOTA_ON(mp)) {
739                                 ASSERT(!XFS_IS_GQUOTA_ON(mp));
740                                 ASSERT(mask & XFS_AT_PROJID);
741                                 ASSERT(gdqp);
742                                 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
743                                                         &ip->i_gdquot, gdqp);
744                         }
745                         ip->i_d.di_projid = projid;
746                         /*
747                          * We may have to rev the inode as well as
748                          * the superblock version number since projids didn't
749                          * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
750                          */
751                         if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
752                                 xfs_bump_ino_vers2(tp, ip);
753                 }
754
755                 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
756                 timeflags |= XFS_ICHGTIME_CHG;
757         }
758
759
760         /*
761          * Change file access or modified times.
762          */
763         if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
764                 if (mask & XFS_AT_ATIME) {
765                         ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
766                         ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
767                         ip->i_update_core = 1;
768                         timeflags &= ~XFS_ICHGTIME_ACC;
769                 }
770                 if (mask & XFS_AT_MTIME) {
771                         ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
772                         ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
773                         timeflags &= ~XFS_ICHGTIME_MOD;
774                         timeflags |= XFS_ICHGTIME_CHG;
775                 }
776                 if (tp && (flags & ATTR_UTIME))
777                         xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
778         }
779
780         /*
781          * Change XFS-added attributes.
782          */
783         if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
784                 if (mask & XFS_AT_EXTSIZE) {
785                         /*
786                          * Converting bytes to fs blocks.
787                          */
788                         ip->i_d.di_extsize = vap->va_extsize >>
789                                 mp->m_sb.sb_blocklog;
790                 }
791                 if (mask & XFS_AT_XFLAGS) {
792                         uint    di_flags;
793
794                         /* can't set PREALLOC this way, just preserve it */
795                         di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
796                         if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
797                                 di_flags |= XFS_DIFLAG_IMMUTABLE;
798                         if (vap->va_xflags & XFS_XFLAG_APPEND)
799                                 di_flags |= XFS_DIFLAG_APPEND;
800                         if (vap->va_xflags & XFS_XFLAG_SYNC)
801                                 di_flags |= XFS_DIFLAG_SYNC;
802                         if (vap->va_xflags & XFS_XFLAG_NOATIME)
803                                 di_flags |= XFS_DIFLAG_NOATIME;
804                         if (vap->va_xflags & XFS_XFLAG_NODUMP)
805                                 di_flags |= XFS_DIFLAG_NODUMP;
806                         if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
807                                 di_flags |= XFS_DIFLAG_PROJINHERIT;
808                         if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
809                                 di_flags |= XFS_DIFLAG_NODEFRAG;
810                         if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
811                                 di_flags |= XFS_DIFLAG_FILESTREAM;
812                         if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
813                                 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
814                                         di_flags |= XFS_DIFLAG_RTINHERIT;
815                                 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
816                                         di_flags |= XFS_DIFLAG_NOSYMLINKS;
817                                 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
818                                         di_flags |= XFS_DIFLAG_EXTSZINHERIT;
819                         } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
820                                 if (vap->va_xflags & XFS_XFLAG_REALTIME) {
821                                         di_flags |= XFS_DIFLAG_REALTIME;
822                                         ip->i_iocore.io_flags |= XFS_IOCORE_RT;
823                                 } else {
824                                         ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
825                                 }
826                                 if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
827                                         di_flags |= XFS_DIFLAG_EXTSIZE;
828                         }
829                         ip->i_d.di_flags = di_flags;
830                 }
831                 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
832                 timeflags |= XFS_ICHGTIME_CHG;
833         }
834
835         /*
836          * Change file inode change time only if XFS_AT_CTIME set
837          * AND we have been called by a DMI function.
838          */
839
840         if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
841                 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
842                 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
843                 ip->i_update_core = 1;
844                 timeflags &= ~XFS_ICHGTIME_CHG;
845         }
846
847         /*
848          * Send out timestamp changes that need to be set to the
849          * current time.  Not done when called by a DMI function.
850          */
851         if (timeflags && !(flags & ATTR_DMI))
852                 xfs_ichgtime(ip, timeflags);
853
854         XFS_STATS_INC(xs_ig_attrchg);
855
856         /*
857          * If this is a synchronous mount, make sure that the
858          * transaction goes to disk before returning to the user.
859          * This is slightly sub-optimal in that truncates require
860          * two sync transactions instead of one for wsync filesystems.
861          * One for the truncate and one for the timestamps since we
862          * don't want to change the timestamps unless we're sure the
863          * truncate worked.  Truncates are less than 1% of the laddis
864          * mix so this probably isn't worth the trouble to optimize.
865          */
866         code = 0;
867         if (tp) {
868                 if (mp->m_flags & XFS_MOUNT_WSYNC)
869                         xfs_trans_set_sync(tp);
870
871                 code = xfs_trans_commit(tp, commit_flags);
872         }
873
874         /*
875          * If the (regular) file's mandatory locking mode changed, then
876          * notify the vnode.  We do this under the inode lock to prevent
877          * racing calls to vop_vnode_change.
878          */
879         mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
880         if (mandlock_before != mandlock_after) {
881                 bhv_vop_vnode_change(vp, VCHANGE_FLAGS_ENF_LOCKING,
882                                  mandlock_after);
883         }
884
885         xfs_iunlock(ip, lock_flags);
886
887         /*
888          * Release any dquot(s) the inode had kept before chown.
889          */
890         XFS_QM_DQRELE(mp, olddquot1);
891         XFS_QM_DQRELE(mp, olddquot2);
892         XFS_QM_DQRELE(mp, udqp);
893         XFS_QM_DQRELE(mp, gdqp);
894
895         if (code) {
896                 return code;
897         }
898
899         if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) &&
900             !(flags & ATTR_DMI)) {
901                 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
902                                         NULL, DM_RIGHT_NULL, NULL, NULL,
903                                         0, 0, AT_DELAY_FLAG(flags));
904         }
905         return 0;
906
907  abort_return:
908         commit_flags |= XFS_TRANS_ABORT;
909         /* FALLTHROUGH */
910  error_return:
911         XFS_QM_DQRELE(mp, udqp);
912         XFS_QM_DQRELE(mp, gdqp);
913         if (tp) {
914                 xfs_trans_cancel(tp, commit_flags);
915         }
916         if (lock_flags != 0) {
917                 xfs_iunlock(ip, lock_flags);
918         }
919         return code;
920 }
921
922
923 /*
924  * xfs_access
925  * Null conversion from vnode mode bits to inode mode bits, as in efs.
926  */
927 STATIC int
928 xfs_access(
929         bhv_desc_t      *bdp,
930         int             mode,
931         cred_t          *credp)
932 {
933         xfs_inode_t     *ip;
934         int             error;
935
936         vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
937                                                (inst_t *)__return_address);
938
939         ip = XFS_BHVTOI(bdp);
940         xfs_ilock(ip, XFS_ILOCK_SHARED);
941         error = xfs_iaccess(ip, mode, credp);
942         xfs_iunlock(ip, XFS_ILOCK_SHARED);
943         return error;
944 }
945
946
947 /*
948  * The maximum pathlen is 1024 bytes. Since the minimum file system
949  * blocksize is 512 bytes, we can get a max of 2 extents back from
950  * bmapi.
951  */
952 #define SYMLINK_MAPS 2
953
954 /*
955  * xfs_readlink
956  *
957  */
958 STATIC int
959 xfs_readlink(
960         bhv_desc_t      *bdp,
961         uio_t           *uiop,
962         int             ioflags,
963         cred_t          *credp)
964 {
965         xfs_inode_t     *ip;
966         int             count;
967         xfs_off_t       offset;
968         int             pathlen;
969         bhv_vnode_t     *vp;
970         int             error = 0;
971         xfs_mount_t     *mp;
972         int             nmaps;
973         xfs_bmbt_irec_t mval[SYMLINK_MAPS];
974         xfs_daddr_t     d;
975         int             byte_cnt;
976         int             n;
977         xfs_buf_t       *bp;
978
979         vp = BHV_TO_VNODE(bdp);
980         vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
981
982         ip = XFS_BHVTOI(bdp);
983         mp = ip->i_mount;
984
985         if (XFS_FORCED_SHUTDOWN(mp))
986                 return XFS_ERROR(EIO);
987
988         xfs_ilock(ip, XFS_ILOCK_SHARED);
989
990         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
991
992         offset = uiop->uio_offset;
993         count = uiop->uio_resid;
994
995         if (offset < 0) {
996                 error = XFS_ERROR(EINVAL);
997                 goto error_return;
998         }
999         if (count <= 0) {
1000                 error = 0;
1001                 goto error_return;
1002         }
1003
1004         /*
1005          * See if the symlink is stored inline.
1006          */
1007         pathlen = (int)ip->i_d.di_size;
1008
1009         if (ip->i_df.if_flags & XFS_IFINLINE) {
1010                 error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
1011         }
1012         else {
1013                 /*
1014                  * Symlink not inline.  Call bmap to get it in.
1015                  */
1016                 nmaps = SYMLINK_MAPS;
1017
1018                 error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
1019                                   0, NULL, 0, mval, &nmaps, NULL, NULL);
1020
1021                 if (error) {
1022                         goto error_return;
1023                 }
1024
1025                 for (n = 0; n < nmaps; n++) {
1026                         d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
1027                         byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
1028                         bp = xfs_buf_read(mp->m_ddev_targp, d,
1029                                       BTOBB(byte_cnt), 0);
1030                         error = XFS_BUF_GETERROR(bp);
1031                         if (error) {
1032                                 xfs_ioerror_alert("xfs_readlink",
1033                                           ip->i_mount, bp, XFS_BUF_ADDR(bp));
1034                                 xfs_buf_relse(bp);
1035                                 goto error_return;
1036                         }
1037                         if (pathlen < byte_cnt)
1038                                 byte_cnt = pathlen;
1039                         pathlen -= byte_cnt;
1040
1041                         error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
1042                         xfs_buf_relse (bp);
1043                 }
1044
1045         }
1046
1047 error_return:
1048         xfs_iunlock(ip, XFS_ILOCK_SHARED);
1049         return error;
1050 }
1051
1052
1053 /*
1054  * xfs_fsync
1055  *
1056  * This is called to sync the inode and its data out to disk.
1057  * We need to hold the I/O lock while flushing the data, and
1058  * the inode lock while flushing the inode.  The inode lock CANNOT
1059  * be held while flushing the data, so acquire after we're done
1060  * with that.
1061  */
1062 STATIC int
1063 xfs_fsync(
1064         bhv_desc_t      *bdp,
1065         int             flag,
1066         cred_t          *credp,
1067         xfs_off_t       start,
1068         xfs_off_t       stop)
1069 {
1070         xfs_inode_t     *ip;
1071         xfs_trans_t     *tp;
1072         int             error;
1073         int             log_flushed = 0, changed = 1;
1074
1075         vn_trace_entry(BHV_TO_VNODE(bdp),
1076                         __FUNCTION__, (inst_t *)__return_address);
1077
1078         ip = XFS_BHVTOI(bdp);
1079
1080         ASSERT(start >= 0 && stop >= -1);
1081
1082         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1083                 return XFS_ERROR(EIO);
1084
1085         /*
1086          * We always need to make sure that the required inode state
1087          * is safe on disk.  The vnode might be clean but because
1088          * of committed transactions that haven't hit the disk yet.
1089          * Likewise, there could be unflushed non-transactional
1090          * changes to the inode core that have to go to disk.
1091          *
1092          * The following code depends on one assumption:  that
1093          * any transaction that changes an inode logs the core
1094          * because it has to change some field in the inode core
1095          * (typically nextents or nblocks).  That assumption
1096          * implies that any transactions against an inode will
1097          * catch any non-transactional updates.  If inode-altering
1098          * transactions exist that violate this assumption, the
1099          * code breaks.  Right now, it figures that if the involved
1100          * update_* field is clear and the inode is unpinned, the
1101          * inode is clean.  Either it's been flushed or it's been
1102          * committed and the commit has hit the disk unpinning the inode.
1103          * (Note that xfs_inode_item_format() called at commit clears
1104          * the update_* fields.)
1105          */
1106         xfs_ilock(ip, XFS_ILOCK_SHARED);
1107
1108         /* If we are flushing data then we care about update_size
1109          * being set, otherwise we care about update_core
1110          */
1111         if ((flag & FSYNC_DATA) ?
1112                         (ip->i_update_size == 0) :
1113                         (ip->i_update_core == 0)) {
1114                 /*
1115                  * Timestamps/size haven't changed since last inode
1116                  * flush or inode transaction commit.  That means
1117                  * either nothing got written or a transaction
1118                  * committed which caught the updates.  If the
1119                  * latter happened and the transaction hasn't
1120                  * hit the disk yet, the inode will be still
1121                  * be pinned.  If it is, force the log.
1122                  */
1123
1124                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1125
1126                 if (xfs_ipincount(ip)) {
1127                         _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
1128                                       XFS_LOG_FORCE |
1129                                       ((flag & FSYNC_WAIT)
1130                                        ? XFS_LOG_SYNC : 0),
1131                                       &log_flushed);
1132                 } else {
1133                         /*
1134                          * If the inode is not pinned and nothing
1135                          * has changed we don't need to flush the
1136                          * cache.
1137                          */
1138                         changed = 0;
1139                 }
1140                 error = 0;
1141         } else  {
1142                 /*
1143                  * Kick off a transaction to log the inode
1144                  * core to get the updates.  Make it
1145                  * sync if FSYNC_WAIT is passed in (which
1146                  * is done by everybody but specfs).  The
1147                  * sync transaction will also force the log.
1148                  */
1149                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1150                 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
1151                 if ((error = xfs_trans_reserve(tp, 0,
1152                                 XFS_FSYNC_TS_LOG_RES(ip->i_mount),
1153                                 0, 0, 0)))  {
1154                         xfs_trans_cancel(tp, 0);
1155                         return error;
1156                 }
1157                 xfs_ilock(ip, XFS_ILOCK_EXCL);
1158
1159                 /*
1160                  * Note - it's possible that we might have pushed
1161                  * ourselves out of the way during trans_reserve
1162                  * which would flush the inode.  But there's no
1163                  * guarantee that the inode buffer has actually
1164                  * gone out yet (it's delwri).  Plus the buffer
1165                  * could be pinned anyway if it's part of an
1166                  * inode in another recent transaction.  So we
1167                  * play it safe and fire off the transaction anyway.
1168                  */
1169                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1170                 xfs_trans_ihold(tp, ip);
1171                 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1172                 if (flag & FSYNC_WAIT)
1173                         xfs_trans_set_sync(tp);
1174                 error = _xfs_trans_commit(tp, 0, &log_flushed);
1175
1176                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1177         }
1178
1179         if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
1180                 /*
1181                  * If the log write didn't issue an ordered tag we need
1182                  * to flush the disk cache for the data device now.
1183                  */
1184                 if (!log_flushed)
1185                         xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
1186
1187                 /*
1188                  * If this inode is on the RT dev we need to flush that
1189                  * cache as well.
1190                  */
1191                 if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
1192                         xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
1193         }
1194
1195         return error;
1196 }
1197
1198 /*
1199  * This is called by xfs_inactive to free any blocks beyond eof
1200  * when the link count isn't zero and by xfs_dm_punch_hole() when
1201  * punching a hole to EOF.
1202  */
1203 int
1204 xfs_free_eofblocks(
1205         xfs_mount_t     *mp,
1206         xfs_inode_t     *ip,
1207         int             flags)
1208 {
1209         xfs_trans_t     *tp;
1210         int             error;
1211         xfs_fileoff_t   end_fsb;
1212         xfs_fileoff_t   last_fsb;
1213         xfs_filblks_t   map_len;
1214         int             nimaps;
1215         xfs_bmbt_irec_t imap;
1216         int             use_iolock = (flags & XFS_FREE_EOF_LOCK);
1217
1218         /*
1219          * Figure out if there are any blocks beyond the end
1220          * of the file.  If not, then there is nothing to do.
1221          */
1222         end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
1223         last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1224         map_len = last_fsb - end_fsb;
1225         if (map_len <= 0)
1226                 return 0;
1227
1228         nimaps = 1;
1229         xfs_ilock(ip, XFS_ILOCK_SHARED);
1230         error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0,
1231                           NULL, 0, &imap, &nimaps, NULL, NULL);
1232         xfs_iunlock(ip, XFS_ILOCK_SHARED);
1233
1234         if (!error && (nimaps != 0) &&
1235             (imap.br_startblock != HOLESTARTBLOCK ||
1236              ip->i_delayed_blks)) {
1237                 /*
1238                  * Attach the dquots to the inode up front.
1239                  */
1240                 if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1241                         return error;
1242
1243                 /*
1244                  * There are blocks after the end of file.
1245                  * Free them up now by truncating the file to
1246                  * its current size.
1247                  */
1248                 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1249
1250                 /*
1251                  * Do the xfs_itruncate_start() call before
1252                  * reserving any log space because
1253                  * itruncate_start will call into the buffer
1254                  * cache and we can't
1255                  * do that within a transaction.
1256                  */
1257                 if (use_iolock)
1258                         xfs_ilock(ip, XFS_IOLOCK_EXCL);
1259                 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
1260                                     ip->i_size);
1261                 if (error) {
1262                         xfs_trans_cancel(tp, 0);
1263                         if (use_iolock)
1264                                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1265                         return error;
1266                 }
1267
1268                 error = xfs_trans_reserve(tp, 0,
1269                                           XFS_ITRUNCATE_LOG_RES(mp),
1270                                           0, XFS_TRANS_PERM_LOG_RES,
1271                                           XFS_ITRUNCATE_LOG_COUNT);
1272                 if (error) {
1273                         ASSERT(XFS_FORCED_SHUTDOWN(mp));
1274                         xfs_trans_cancel(tp, 0);
1275                         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1276                         return error;
1277                 }
1278
1279                 xfs_ilock(ip, XFS_ILOCK_EXCL);
1280                 xfs_trans_ijoin(tp, ip,
1281                                 XFS_IOLOCK_EXCL |
1282                                 XFS_ILOCK_EXCL);
1283                 xfs_trans_ihold(tp, ip);
1284
1285                 error = xfs_itruncate_finish(&tp, ip,
1286                                              ip->i_size,
1287                                              XFS_DATA_FORK,
1288                                              0);
1289                 /*
1290                  * If we get an error at this point we
1291                  * simply don't bother truncating the file.
1292                  */
1293                 if (error) {
1294                         xfs_trans_cancel(tp,
1295                                          (XFS_TRANS_RELEASE_LOG_RES |
1296                                           XFS_TRANS_ABORT));
1297                 } else {
1298                         error = xfs_trans_commit(tp,
1299                                                 XFS_TRANS_RELEASE_LOG_RES);
1300                 }
1301                 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)
1302                                             : XFS_ILOCK_EXCL));
1303         }
1304         return error;
1305 }
1306
1307 /*
1308  * Free a symlink that has blocks associated with it.
1309  */
1310 STATIC int
1311 xfs_inactive_symlink_rmt(
1312         xfs_inode_t     *ip,
1313         xfs_trans_t     **tpp)
1314 {
1315         xfs_buf_t       *bp;
1316         int             committed;
1317         int             done;
1318         int             error;
1319         xfs_fsblock_t   first_block;
1320         xfs_bmap_free_t free_list;
1321         int             i;
1322         xfs_mount_t     *mp;
1323         xfs_bmbt_irec_t mval[SYMLINK_MAPS];
1324         int             nmaps;
1325         xfs_trans_t     *ntp;
1326         int             size;
1327         xfs_trans_t     *tp;
1328
1329         tp = *tpp;
1330         mp = ip->i_mount;
1331         ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
1332         /*
1333          * We're freeing a symlink that has some
1334          * blocks allocated to it.  Free the
1335          * blocks here.  We know that we've got
1336          * either 1 or 2 extents and that we can
1337          * free them all in one bunmapi call.
1338          */
1339         ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
1340         if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
1341                         XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
1342                 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1343                 xfs_trans_cancel(tp, 0);
1344                 *tpp = NULL;
1345                 return error;
1346         }
1347         /*
1348          * Lock the inode, fix the size, and join it to the transaction.
1349          * Hold it so in the normal path, we still have it locked for
1350          * the second transaction.  In the error paths we need it
1351          * held so the cancel won't rele it, see below.
1352          */
1353         xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1354         size = (int)ip->i_d.di_size;
1355         ip->i_d.di_size = 0;
1356         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1357         xfs_trans_ihold(tp, ip);
1358         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1359         /*
1360          * Find the block(s) so we can inval and unmap them.
1361          */
1362         done = 0;
1363         XFS_BMAP_INIT(&free_list, &first_block);
1364         nmaps = ARRAY_SIZE(mval);
1365         if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
1366                         XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
1367                         &free_list, NULL)))
1368                 goto error0;
1369         /*
1370          * Invalidate the block(s).
1371          */
1372         for (i = 0; i < nmaps; i++) {
1373                 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
1374                         XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
1375                         XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
1376                 xfs_trans_binval(tp, bp);
1377         }
1378         /*
1379          * Unmap the dead block(s) to the free_list.
1380          */
1381         if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
1382                         &first_block, &free_list, NULL, &done)))
1383                 goto error1;
1384         ASSERT(done);
1385         /*
1386          * Commit the first transaction.  This logs the EFI and the inode.
1387          */
1388         if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
1389                 goto error1;
1390         /*
1391          * The transaction must have been committed, since there were
1392          * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
1393          * The new tp has the extent freeing and EFDs.
1394          */
1395         ASSERT(committed);
1396         /*
1397          * The first xact was committed, so add the inode to the new one.
1398          * Mark it dirty so it will be logged and moved forward in the log as
1399          * part of every commit.
1400          */
1401         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1402         xfs_trans_ihold(tp, ip);
1403         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1404         /*
1405          * Get a new, empty transaction to return to our caller.
1406          */
1407         ntp = xfs_trans_dup(tp);
1408         /*
1409          * Commit the transaction containing extent freeing and EFDs.
1410          * If we get an error on the commit here or on the reserve below,
1411          * we need to unlock the inode since the new transaction doesn't
1412          * have the inode attached.
1413          */
1414         error = xfs_trans_commit(tp, 0);
1415         tp = ntp;
1416         if (error) {
1417                 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1418                 goto error0;
1419         }
1420         /*
1421          * Remove the memory for extent descriptions (just bookkeeping).
1422          */
1423         if (ip->i_df.if_bytes)
1424                 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
1425         ASSERT(ip->i_df.if_bytes == 0);
1426         /*
1427          * Put an itruncate log reservation in the new transaction
1428          * for our caller.
1429          */
1430         if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
1431                         XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
1432                 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1433                 goto error0;
1434         }
1435         /*
1436          * Return with the inode locked but not joined to the transaction.
1437          */
1438         *tpp = tp;
1439         return 0;
1440
1441  error1:
1442         xfs_bmap_cancel(&free_list);
1443  error0:
1444         /*
1445          * Have to come here with the inode locked and either
1446          * (held and in the transaction) or (not in the transaction).
1447          * If the inode isn't held then cancel would iput it, but
1448          * that's wrong since this is inactive and the vnode ref
1449          * count is 0 already.
1450          * Cancel won't do anything to the inode if held, but it still
1451          * needs to be locked until the cancel is done, if it was
1452          * joined to the transaction.
1453          */
1454         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1455         xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1456         *tpp = NULL;
1457         return error;
1458
1459 }
1460
1461 STATIC int
1462 xfs_inactive_symlink_local(
1463         xfs_inode_t     *ip,
1464         xfs_trans_t     **tpp)
1465 {
1466         int             error;
1467
1468         ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
1469         /*
1470          * We're freeing a symlink which fit into
1471          * the inode.  Just free the memory used
1472          * to hold the old symlink.
1473          */
1474         error = xfs_trans_reserve(*tpp, 0,
1475                                   XFS_ITRUNCATE_LOG_RES(ip->i_mount),
1476                                   0, XFS_TRANS_PERM_LOG_RES,
1477                                   XFS_ITRUNCATE_LOG_COUNT);
1478
1479         if (error) {
1480                 xfs_trans_cancel(*tpp, 0);
1481                 *tpp = NULL;
1482                 return error;
1483         }
1484         xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1485
1486         /*
1487          * Zero length symlinks _can_ exist.
1488          */
1489         if (ip->i_df.if_bytes > 0) {
1490                 xfs_idata_realloc(ip,
1491                                   -(ip->i_df.if_bytes),
1492                                   XFS_DATA_FORK);
1493                 ASSERT(ip->i_df.if_bytes == 0);
1494         }
1495         return 0;
1496 }
1497
1498 STATIC int
1499 xfs_inactive_attrs(
1500         xfs_inode_t     *ip,
1501         xfs_trans_t     **tpp)
1502 {
1503         xfs_trans_t     *tp;
1504         int             error;
1505         xfs_mount_t     *mp;
1506
1507         ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
1508         tp = *tpp;
1509         mp = ip->i_mount;
1510         ASSERT(ip->i_d.di_forkoff != 0);
1511         xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1512         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1513
1514         error = xfs_attr_inactive(ip);
1515         if (error) {
1516                 *tpp = NULL;
1517                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1518                 return error; /* goto out */
1519         }
1520
1521         tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1522         error = xfs_trans_reserve(tp, 0,
1523                                   XFS_IFREE_LOG_RES(mp),
1524                                   0, XFS_TRANS_PERM_LOG_RES,
1525                                   XFS_INACTIVE_LOG_COUNT);
1526         if (error) {
1527                 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1528                 xfs_trans_cancel(tp, 0);
1529                 *tpp = NULL;
1530                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1531                 return error;
1532         }
1533
1534         xfs_ilock(ip, XFS_ILOCK_EXCL);
1535         xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1536         xfs_trans_ihold(tp, ip);
1537         xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1538
1539         ASSERT(ip->i_d.di_anextents == 0);
1540
1541         *tpp = tp;
1542         return 0;
1543 }
1544
1545 STATIC int
1546 xfs_release(
1547         bhv_desc_t      *bdp)
1548 {
1549         xfs_inode_t     *ip;
1550         bhv_vnode_t     *vp;
1551         xfs_mount_t     *mp;
1552         int             error;
1553
1554         vp = BHV_TO_VNODE(bdp);
1555         ip = XFS_BHVTOI(bdp);
1556         mp = ip->i_mount;
1557
1558         if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
1559                 return 0;
1560
1561         /* If this is a read-only mount, don't do this (would generate I/O) */
1562         if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
1563                 return 0;
1564
1565         if (!XFS_FORCED_SHUTDOWN(mp)) {
1566                 /*
1567                  * If we are using filestreams, and we have an unlinked
1568                  * file that we are processing the last close on, then nothing
1569                  * will be able to reopen and write to this file. Purge this
1570                  * inode from the filestreams cache so that it doesn't delay
1571                  * teardown of the inode.
1572                  */
1573                 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
1574                         xfs_filestream_deassociate(ip);
1575
1576                 /*
1577                  * If we previously truncated this file and removed old data
1578                  * in the process, we want to initiate "early" writeout on
1579                  * the last close.  This is an attempt to combat the notorious
1580                  * NULL files problem which is particularly noticable from a
1581                  * truncate down, buffered (re-)write (delalloc), followed by
1582                  * a crash.  What we are effectively doing here is
1583                  * significantly reducing the time window where we'd otherwise
1584                  * be exposed to that problem.
1585                  */
1586                 if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
1587                         bhv_vop_flush_pages(vp, 0, -1, XFS_B_ASYNC, FI_NONE);
1588         }
1589
1590 #ifdef HAVE_REFCACHE
1591         /* If we are in the NFS reference cache then don't do this now */
1592         if (ip->i_refcache)
1593                 return 0;
1594 #endif
1595
1596         if (ip->i_d.di_nlink != 0) {
1597                 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1598                      ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
1599                        ip->i_delayed_blks > 0)) &&
1600                      (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
1601                     (!(ip->i_d.di_flags &
1602                                 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1603                         error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1604                         if (error)
1605                                 return error;
1606                         /* Update linux inode block count after free above */
1607                         vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
1608                                 ip->i_d.di_nblocks + ip->i_delayed_blks);
1609                 }
1610         }
1611
1612         return 0;
1613 }
1614
1615 /*
1616  * xfs_inactive
1617  *
1618  * This is called when the vnode reference count for the vnode
1619  * goes to zero.  If the file has been unlinked, then it must
1620  * now be truncated.  Also, we clear all of the read-ahead state
1621  * kept for the inode here since the file is now closed.
1622  */
1623 STATIC int
1624 xfs_inactive(
1625         bhv_desc_t      *bdp,
1626         cred_t          *credp)
1627 {
1628         xfs_inode_t     *ip;
1629         bhv_vnode_t     *vp;
1630         xfs_bmap_free_t free_list;
1631         xfs_fsblock_t   first_block;
1632         int             committed;
1633         xfs_trans_t     *tp;
1634         xfs_mount_t     *mp;
1635         int             error;
1636         int             truncate;
1637
1638         vp = BHV_TO_VNODE(bdp);
1639         vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
1640
1641         ip = XFS_BHVTOI(bdp);
1642
1643         /*
1644          * If the inode is already free, then there can be nothing
1645          * to clean up here.
1646          */
1647         if (ip->i_d.di_mode == 0 || VN_BAD(vp)) {
1648                 ASSERT(ip->i_df.if_real_bytes == 0);
1649                 ASSERT(ip->i_df.if_broot_bytes == 0);
1650                 return VN_INACTIVE_CACHE;
1651         }
1652
1653         /*
1654          * Only do a truncate if it's a regular file with
1655          * some actual space in it.  It's OK to look at the
1656          * inode's fields without the lock because we're the
1657          * only one with a reference to the inode.
1658          */
1659         truncate = ((ip->i_d.di_nlink == 0) &&
1660             ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
1661              (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
1662             ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1663
1664         mp = ip->i_mount;
1665
1666         if (ip->i_d.di_nlink == 0 &&
1667             DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) {
1668                 (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL);
1669         }
1670
1671         error = 0;
1672
1673         /* If this is a read-only mount, don't do this (would generate I/O) */
1674         if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
1675                 goto out;
1676
1677         if (ip->i_d.di_nlink != 0) {
1678                 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1679                      ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
1680                        ip->i_delayed_blks > 0)) &&
1681                       (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1682                      (!(ip->i_d.di_flags &
1683                                 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1684                       (ip->i_delayed_blks != 0)))) {
1685                         error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
1686                         if (error)
1687                                 return VN_INACTIVE_CACHE;
1688                         /* Update linux inode block count after free above */
1689                         vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
1690                                 ip->i_d.di_nblocks + ip->i_delayed_blks);
1691                 }
1692                 goto out;
1693         }
1694
1695         ASSERT(ip->i_d.di_nlink == 0);
1696
1697         if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1698                 return VN_INACTIVE_CACHE;
1699
1700         tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1701         if (truncate) {
1702                 /*
1703                  * Do the xfs_itruncate_start() call before
1704                  * reserving any log space because itruncate_start
1705                  * will call into the buffer cache and we can't
1706                  * do that within a transaction.
1707                  */
1708                 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1709
1710                 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
1711                 if (error) {
1712                         xfs_trans_cancel(tp, 0);
1713                         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1714                         return VN_INACTIVE_CACHE;
1715                 }
1716
1717                 error = xfs_trans_reserve(tp, 0,
1718                                           XFS_ITRUNCATE_LOG_RES(mp),
1719                                           0, XFS_TRANS_PERM_LOG_RES,
1720                                           XFS_ITRUNCATE_LOG_COUNT);
1721                 if (error) {
1722                         /* Don't call itruncate_cleanup */
1723                         ASSERT(XFS_FORCED_SHUTDOWN(mp));
1724                         xfs_trans_cancel(tp, 0);
1725                         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1726                         return VN_INACTIVE_CACHE;
1727                 }
1728
1729                 xfs_ilock(ip, XFS_ILOCK_EXCL);
1730                 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1731                 xfs_trans_ihold(tp, ip);
1732
1733                 /*
1734                  * normally, we have to run xfs_itruncate_finish sync.
1735                  * But if filesystem is wsync and we're in the inactive
1736                  * path, then we know that nlink == 0, and that the
1737                  * xaction that made nlink == 0 is permanently committed
1738                  * since xfs_remove runs as a synchronous transaction.
1739                  */
1740                 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
1741                                 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
1742
1743                 if (error) {
1744                         xfs_trans_cancel(tp,
1745                                 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1746                         xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1747                         return VN_INACTIVE_CACHE;
1748                 }
1749         } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
1750
1751                 /*
1752                  * If we get an error while cleaning up a
1753                  * symlink we bail out.
1754                  */
1755                 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
1756                         xfs_inactive_symlink_rmt(ip, &tp) :
1757                         xfs_inactive_symlink_local(ip, &tp);
1758
1759                 if (error) {
1760                         ASSERT(tp == NULL);
1761                         return VN_INACTIVE_CACHE;
1762                 }
1763
1764                 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1765                 xfs_trans_ihold(tp, ip);
1766         } else {
1767                 error = xfs_trans_reserve(tp, 0,
1768                                           XFS_IFREE_LOG_RES(mp),
1769                                           0, XFS_TRANS_PERM_LOG_RES,
1770                                           XFS_INACTIVE_LOG_COUNT);
1771                 if (error) {
1772                         ASSERT(XFS_FORCED_SHUTDOWN(mp));
1773                         xfs_trans_cancel(tp, 0);
1774                         return VN_INACTIVE_CACHE;
1775                 }
1776
1777                 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1778                 xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1779                 xfs_trans_ihold(tp, ip);
1780         }
1781
1782         /*
1783          * If there are attributes associated with the file
1784          * then blow them away now.  The code calls a routine
1785          * that recursively deconstructs the attribute fork.
1786          * We need to just commit the current transaction
1787          * because we can't use it for xfs_attr_inactive().
1788          */
1789         if (ip->i_d.di_anextents > 0) {
1790                 error = xfs_inactive_attrs(ip, &tp);
1791                 /*
1792                  * If we got an error, the transaction is already
1793                  * cancelled, and the inode is unlocked. Just get out.
1794                  */
1795                  if (error)
1796                          return VN_INACTIVE_CACHE;
1797         } else if (ip->i_afp) {
1798                 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1799         }
1800
1801         /*
1802          * Free the inode.
1803          */
1804         XFS_BMAP_INIT(&free_list, &first_block);
1805         error = xfs_ifree(tp, ip, &free_list);
1806         if (error) {
1807                 /*
1808                  * If we fail to free the inode, shut down.  The cancel
1809                  * might do that, we need to make sure.  Otherwise the
1810                  * inode might be lost for a long time or forever.
1811                  */
1812                 if (!XFS_FORCED_SHUTDOWN(mp)) {
1813                         cmn_err(CE_NOTE,
1814                 "xfs_inactive:  xfs_ifree() returned an error = %d on %s",
1815                                 error, mp->m_fsname);
1816                         xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1817                 }
1818                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
1819         } else {
1820                 /*
1821                  * Credit the quota account(s). The inode is gone.
1822                  */
1823                 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
1824
1825                 /*
1826                  * Just ignore errors at this point.  There is
1827                  * nothing we can do except to try to keep going.
1828                  */
1829                 (void) xfs_bmap_finish(&tp,  &free_list, &committed);
1830                 (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1831         }
1832         /*
1833          * Release the dquots held by inode, if any.
1834          */
1835         XFS_QM_DQDETACH(mp, ip);
1836
1837         xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1838
1839  out:
1840         return VN_INACTIVE_CACHE;
1841 }
1842
1843
1844 /*
1845  * xfs_lookup
1846  */
1847 STATIC int
1848 xfs_lookup(
1849         bhv_desc_t              *dir_bdp,
1850         bhv_vname_t             *dentry,
1851         bhv_vnode_t             **vpp,
1852         int                     flags,
1853         bhv_vnode_t             *rdir,
1854         cred_t                  *credp)
1855 {
1856         xfs_inode_t             *dp, *ip;
1857         xfs_ino_t               e_inum;
1858         int                     error;
1859         uint                    lock_mode;
1860         bhv_vnode_t             *dir_vp;
1861
1862         dir_vp = BHV_TO_VNODE(dir_bdp);
1863         vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
1864
1865         dp = XFS_BHVTOI(dir_bdp);
1866
1867         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
1868                 return XFS_ERROR(EIO);
1869
1870         lock_mode = xfs_ilock_map_shared(dp);
1871         error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip);
1872         if (!error) {
1873                 *vpp = XFS_ITOV(ip);
1874                 ITRACE(ip);
1875         }
1876         xfs_iunlock_map_shared(dp, lock_mode);
1877         return error;
1878 }
1879
1880
1881 /*
1882  * xfs_create (create a new file).
1883  */
1884 STATIC int
1885 xfs_create(
1886         bhv_desc_t              *dir_bdp,
1887         bhv_vname_t             *dentry,
1888         bhv_vattr_t             *vap,
1889         bhv_vnode_t             **vpp,
1890         cred_t                  *credp)
1891 {
1892         char                    *name = VNAME(dentry);
1893         bhv_vnode_t             *dir_vp;
1894         xfs_inode_t             *dp, *ip;
1895         bhv_vnode_t             *vp = NULL;
1896         xfs_trans_t             *tp;
1897         xfs_mount_t             *mp;
1898         xfs_dev_t               rdev;
1899         int                     error;
1900         xfs_bmap_free_t         free_list;
1901         xfs_fsblock_t           first_block;
1902         boolean_t               dp_joined_to_trans;
1903         int                     dm_event_sent = 0;
1904         uint                    cancel_flags;
1905         int                     committed;
1906         xfs_prid_t              prid;
1907         struct xfs_dquot        *udqp, *gdqp;
1908         uint                    resblks;
1909         int                     dm_di_mode;
1910         int                     namelen;
1911
1912         ASSERT(!*vpp);
1913         dir_vp = BHV_TO_VNODE(dir_bdp);
1914         vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
1915
1916         dp = XFS_BHVTOI(dir_bdp);
1917         mp = dp->i_mount;
1918
1919         dm_di_mode = vap->va_mode;
1920         namelen = VNAMELEN(dentry);
1921
1922         if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
1923                 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1924                                 dir_vp, DM_RIGHT_NULL, NULL,
1925                                 DM_RIGHT_NULL, name, NULL,
1926                                 dm_di_mode, 0, 0);
1927
1928                 if (error)
1929                         return error;
1930                 dm_event_sent = 1;
1931         }
1932
1933         if (XFS_FORCED_SHUTDOWN(mp))
1934                 return XFS_ERROR(EIO);
1935
1936         /* Return through std_return after this point. */
1937
1938         udqp = gdqp = NULL;
1939         if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
1940                 prid = dp->i_d.di_projid;
1941         else if (vap->va_mask & XFS_AT_PROJID)
1942                 prid = (xfs_prid_t)vap->va_projid;
1943         else
1944                 prid = (xfs_prid_t)dfltprid;
1945
1946         /*
1947          * Make sure that we have allocated dquot(s) on disk.
1948          */
1949         error = XFS_QM_DQVOPALLOC(mp, dp,
1950                         current_fsuid(credp), current_fsgid(credp), prid,
1951                         XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
1952         if (error)
1953                 goto std_return;
1954
1955         ip = NULL;
1956         dp_joined_to_trans = B_FALSE;
1957
1958         tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1959         cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1960         resblks = XFS_CREATE_SPACE_RES(mp, namelen);
1961         /*
1962          * Initially assume that the file does not exist and
1963          * reserve the resources for that case.  If that is not
1964          * the case we'll drop the one we have and get a more
1965          * appropriate transaction later.
1966          */
1967         error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
1968                         XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1969         if (error == ENOSPC) {
1970                 resblks = 0;
1971                 error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
1972                                 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1973         }
1974         if (error) {
1975                 cancel_flags = 0;
1976                 dp = NULL;
1977                 goto error_return;
1978         }
1979
1980         xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1981
1982         XFS_BMAP_INIT(&free_list, &first_block);
1983
1984         ASSERT(ip == NULL);
1985
1986         /*
1987          * Reserve disk quota and the inode.
1988          */
1989         error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
1990         if (error)
1991                 goto error_return;
1992
1993         if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen)))
1994                 goto error_return;
1995         rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
1996         error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
1997                         rdev, credp, prid, resblks > 0,
1998                         &ip, &committed);
1999         if (error) {
2000                 if (error == ENOSPC)
2001                         goto error_return;
2002                 goto abort_return;
2003         }
2004         ITRACE(ip);
2005
2006         /*
2007          * At this point, we've gotten a newly allocated inode.
2008          * It is locked (and joined to the transaction).
2009          */
2010
2011         ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
2012
2013         /*
2014          * Now we join the directory inode to the transaction.
2015          * We do not do it earlier because xfs_dir_ialloc
2016          * might commit the previous transaction (and release
2017          * all the locks).
2018          */
2019
2020         VN_HOLD(dir_vp);
2021         xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2022         dp_joined_to_trans = B_TRUE;
2023
2024         error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino,
2025                                         &first_block, &free_list, resblks ?
2026                                         resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2027         if (error) {
2028                 ASSERT(error != ENOSPC);
2029                 goto abort_return;
2030         }
2031         xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2032         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2033
2034         /*
2035          * If this is a synchronous mount, make sure that the
2036          * create transaction goes to disk before returning to
2037          * the user.
2038          */
2039         if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2040                 xfs_trans_set_sync(tp);
2041         }
2042
2043         dp->i_gen++;
2044
2045         /*
2046          * Attach the dquot(s) to the inodes and modify them incore.
2047          * These ids of the inode couldn't have changed since the new
2048          * inode has been locked ever since it was created.
2049          */
2050         XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
2051
2052         /*
2053          * xfs_trans_commit normally decrements the vnode ref count
2054          * when it unlocks the inode. Since we want to return the
2055          * vnode to the caller, we bump the vnode ref count now.
2056          */
2057         IHOLD(ip);
2058         vp = XFS_ITOV(ip);
2059
2060         error = xfs_bmap_finish(&tp, &free_list, &committed);
2061         if (error) {
2062                 xfs_bmap_cancel(&free_list);
2063                 goto abort_rele;
2064         }
2065
2066         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2067         if (error) {
2068                 IRELE(ip);
2069                 tp = NULL;
2070                 goto error_return;
2071         }
2072
2073         XFS_QM_DQRELE(mp, udqp);
2074         XFS_QM_DQRELE(mp, gdqp);
2075
2076         /*
2077          * Propagate the fact that the vnode changed after the
2078          * xfs_inode locks have been released.
2079          */
2080         bhv_vop_vnode_change(vp, VCHANGE_FLAGS_TRUNCATED, 3);
2081
2082         *vpp = vp;
2083
2084         /* Fallthrough to std_return with error = 0  */
2085
2086 std_return:
2087         if ( (*vpp || (error != 0 && dm_event_sent != 0)) &&
2088                         DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
2089                                                         DM_EVENT_POSTCREATE)) {
2090                 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
2091                         dir_vp, DM_RIGHT_NULL,
2092                         *vpp ? vp:NULL,
2093                         DM_RIGHT_NULL, name, NULL,
2094                         dm_di_mode, error, 0);
2095         }
2096         return error;
2097
2098  abort_return:
2099         cancel_flags |= XFS_TRANS_ABORT;
2100         /* FALLTHROUGH */
2101
2102  error_return:
2103         if (tp != NULL)
2104                 xfs_trans_cancel(tp, cancel_flags);
2105
2106         if (!dp_joined_to_trans && (dp != NULL))
2107                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
2108         XFS_QM_DQRELE(mp, udqp);
2109         XFS_QM_DQRELE(mp, gdqp);
2110
2111         goto std_return;
2112
2113  abort_rele:
2114         /*
2115          * Wait until after the current transaction is aborted to
2116          * release the inode.  This prevents recursive transactions
2117          * and deadlocks from xfs_inactive.
2118          */
2119         cancel_flags |= XFS_TRANS_ABORT;
2120         xfs_trans_cancel(tp, cancel_flags);
2121         IRELE(ip);
2122
2123         XFS_QM_DQRELE(mp, udqp);
2124         XFS_QM_DQRELE(mp, gdqp);
2125
2126         goto std_return;
2127 }
2128
2129 #ifdef DEBUG
2130 /*
2131  * Some counters to see if (and how often) we are hitting some deadlock
2132  * prevention code paths.
2133  */
2134
2135 int xfs_rm_locks;
2136 int xfs_rm_lock_delays;
2137 int xfs_rm_attempts;
2138 #endif
2139
2140 /*
2141  * The following routine will lock the inodes associated with the
2142  * directory and the named entry in the directory. The locks are
2143  * acquired in increasing inode number.
2144  *
2145  * If the entry is "..", then only the directory is locked. The
2146  * vnode ref count will still include that from the .. entry in
2147  * this case.
2148  *
2149  * There is a deadlock we need to worry about. If the locked directory is
2150  * in the AIL, it might be blocking up the log. The next inode we lock
2151  * could be already locked by another thread waiting for log space (e.g
2152  * a permanent log reservation with a long running transaction (see
2153  * xfs_itruncate_finish)). To solve this, we must check if the directory
2154  * is in the ail and use lock_nowait. If we can't lock, we need to
2155  * drop the inode lock on the directory and try again. xfs_iunlock will
2156  * potentially push the tail if we were holding up the log.
2157  */
2158 STATIC int
2159 xfs_lock_dir_and_entry(
2160         xfs_inode_t     *dp,
2161         xfs_inode_t     *ip)    /* inode of entry 'name' */
2162 {
2163         int             attempts;
2164         xfs_ino_t       e_inum;
2165         xfs_inode_t     *ips[2];
2166         xfs_log_item_t  *lp;
2167
2168 #ifdef DEBUG
2169         xfs_rm_locks++;
2170 #endif
2171         attempts = 0;
2172
2173 again:
2174         xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2175
2176         e_inum = ip->i_ino;
2177
2178         ITRACE(ip);
2179
2180         /*
2181          * We want to lock in increasing inum. Since we've already
2182          * acquired the lock on the directory, we may need to release
2183          * if if the inum of the entry turns out to be less.
2184          */
2185         if (e_inum > dp->i_ino) {
2186                 /*
2187                  * We are already in the right order, so just
2188                  * lock on the inode of the entry.
2189                  * We need to use nowait if dp is in the AIL.
2190                  */
2191
2192                 lp = (xfs_log_item_t *)dp->i_itemp;
2193                 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
2194                         if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2195                                 attempts++;
2196 #ifdef DEBUG
2197                                 xfs_rm_attempts++;
2198 #endif
2199
2200                                 /*
2201                                  * Unlock dp and try again.
2202                                  * xfs_iunlock will try to push the tail
2203                                  * if the inode is in the AIL.
2204                                  */
2205
2206                                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
2207
2208                                 if ((attempts % 5) == 0) {
2209                                         delay(1); /* Don't just spin the CPU */
2210 #ifdef DEBUG
2211                                         xfs_rm_lock_delays++;
2212 #endif
2213                                 }
2214                                 goto again;
2215                         }
2216                 } else {
2217                         xfs_ilock(ip, XFS_ILOCK_EXCL);
2218                 }
2219         } else if (e_inum < dp->i_ino) {
2220                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
2221
2222                 ips[0] = ip;
2223                 ips[1] = dp;
2224                 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
2225         }
2226         /* else  e_inum == dp->i_ino */
2227         /*     This can happen if we're asked to lock /x/..
2228          *     the entry is "..", which is also the parent directory.
2229          */
2230
2231         return 0;
2232 }
2233
2234 #ifdef DEBUG
2235 int xfs_locked_n;
2236 int xfs_small_retries;
2237 int xfs_middle_retries;
2238 int xfs_lots_retries;
2239 int xfs_lock_delays;
2240 #endif
2241
2242 /*
2243  * Bump the subclass so xfs_lock_inodes() acquires each lock with
2244  * a different value
2245  */
2246 static inline int
2247 xfs_lock_inumorder(int lock_mode, int subclass)
2248 {
2249         if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
2250                 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
2251         if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
2252                 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
2253
2254         return lock_mode;
2255 }
2256
2257 /*
2258  * The following routine will lock n inodes in exclusive mode.
2259  * We assume the caller calls us with the inodes in i_ino order.
2260  *
2261  * We need to detect deadlock where an inode that we lock
2262  * is in the AIL and we start waiting for another inode that is locked
2263  * by a thread in a long running transaction (such as truncate). This can
2264  * result in deadlock since the long running trans might need to wait
2265  * for the inode we just locked in order to push the tail and free space
2266  * in the log.
2267  */
2268 void
2269 xfs_lock_inodes(
2270         xfs_inode_t     **ips,
2271         int             inodes,
2272         int             first_locked,
2273         uint            lock_mode)
2274 {
2275         int             attempts = 0, i, j, try_lock;
2276         xfs_log_item_t  *lp;
2277
2278         ASSERT(ips && (inodes >= 2)); /* we need at least two */
2279
2280         if (first_locked) {
2281                 try_lock = 1;
2282                 i = 1;
2283         } else {
2284                 try_lock = 0;
2285                 i = 0;
2286         }
2287
2288 again:
2289         for (; i < inodes; i++) {
2290                 ASSERT(ips[i]);
2291
2292                 if (i && (ips[i] == ips[i-1]))  /* Already locked */
2293                         continue;
2294
2295                 /*
2296                  * If try_lock is not set yet, make sure all locked inodes
2297                  * are not in the AIL.
2298                  * If any are, set try_lock to be used later.
2299                  */
2300
2301                 if (!try_lock) {
2302                         for (j = (i - 1); j >= 0 && !try_lock; j--) {
2303                                 lp = (xfs_log_item_t *)ips[j]->i_itemp;
2304                                 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
2305                                         try_lock++;
2306                                 }
2307                         }
2308                 }
2309
2310                 /*
2311                  * If any of the previous locks we have locked is in the AIL,
2312                  * we must TRY to get the second and subsequent locks. If
2313                  * we can't get any, we must release all we have
2314                  * and try again.
2315                  */
2316
2317                 if (try_lock) {
2318                         /* try_lock must be 0 if i is 0. */
2319                         /*
2320                          * try_lock means we have an inode locked
2321                          * that is in the AIL.
2322                          */
2323                         ASSERT(i != 0);
2324                         if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
2325                                 attempts++;
2326
2327                                 /*
2328                                  * Unlock all previous guys and try again.
2329                                  * xfs_iunlock will try to push the tail
2330                                  * if the inode is in the AIL.
2331                                  */
2332
2333                                 for(j = i - 1; j >= 0; j--) {
2334
2335                                         /*
2336                                          * Check to see if we've already
2337                                          * unlocked this one.
2338                                          * Not the first one going back,
2339                                          * and the inode ptr is the same.
2340                                          */
2341                                         if ((j != (i - 1)) && ips[j] ==
2342                                                                 ips[j+1])
2343                                                 continue;
2344
2345                                         xfs_iunlock(ips[j], lock_mode);
2346                                 }
2347
2348                                 if ((attempts % 5) == 0) {
2349                                         delay(1); /* Don't just spin the CPU */
2350 #ifdef DEBUG
2351                                         xfs_lock_delays++;
2352 #endif
2353                                 }
2354                                 i = 0;
2355                                 try_lock = 0;
2356                                 goto again;
2357                         }
2358                 } else {
2359                         xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
2360                 }
2361         }
2362
2363 #ifdef DEBUG
2364         if (attempts) {
2365                 if (attempts < 5) xfs_small_retries++;
2366                 else if (attempts < 100) xfs_middle_retries++;
2367                 else xfs_lots_retries++;
2368         } else {
2369                 xfs_locked_n++;
2370         }
2371 #endif
2372 }
2373
2374 #ifdef  DEBUG
2375 #define REMOVE_DEBUG_TRACE(x)   {remove_which_error_return = (x);}
2376 int remove_which_error_return = 0;
2377 #else /* ! DEBUG */
2378 #define REMOVE_DEBUG_TRACE(x)
2379 #endif  /* ! DEBUG */
2380
2381
2382 /*
2383  * xfs_remove
2384  *
2385  */
2386 STATIC int
2387 xfs_remove(
2388         bhv_desc_t              *dir_bdp,
2389         bhv_vname_t             *dentry,
2390         cred_t                  *credp)
2391 {
2392         bhv_vnode_t             *dir_vp;
2393         char                    *name = VNAME(dentry);
2394         xfs_inode_t             *dp, *ip;
2395         xfs_trans_t             *tp = NULL;
2396         xfs_mount_t             *mp;
2397         int                     error = 0;
2398         xfs_bmap_free_t         free_list;
2399         xfs_fsblock_t           first_block;
2400         int                     cancel_flags;
2401         int                     committed;
2402         int                     dm_di_mode = 0;
2403         int                     link_zero;
2404         uint                    resblks;
2405         int                     namelen;
2406
2407         dir_vp = BHV_TO_VNODE(dir_bdp);
2408         vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
2409
2410         dp = XFS_BHVTOI(dir_bdp);
2411         mp = dp->i_mount;
2412
2413         if (XFS_FORCED_SHUTDOWN(mp))
2414                 return XFS_ERROR(EIO);
2415
2416         namelen = VNAMELEN(dentry);
2417
2418         if (!xfs_get_dir_entry(dentry, &ip)) {
2419                 dm_di_mode = ip->i_d.di_mode;
2420                 IRELE(ip);
2421         }
2422
2423         if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
2424                 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
2425                                         DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2426                                         name, NULL, dm_di_mode, 0, 0);
2427                 if (error)
2428                         return error;
2429         }
2430
2431         /* From this point on, return through std_return */
2432         ip = NULL;
2433
2434         /*
2435          * We need to get a reference to ip before we get our log
2436          * reservation. The reason for this is that we cannot call
2437          * xfs_iget for an inode for which we do not have a reference
2438          * once we've acquired a log reservation. This is because the
2439          * inode we are trying to get might be in xfs_inactive going
2440          * for a log reservation. Since we'll have to wait for the
2441          * inactive code to complete before returning from xfs_iget,
2442          * we need to make sure that we don't have log space reserved
2443          * when we call xfs_iget.  Instead we get an unlocked reference
2444          * to the inode before getting our log reservation.
2445          */
2446         error = xfs_get_dir_entry(dentry, &ip);
2447         if (error) {
2448                 REMOVE_DEBUG_TRACE(__LINE__);
2449                 goto std_return;
2450         }
2451
2452         dm_di_mode = ip->i_d.di_mode;
2453
2454         vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
2455
2456         ITRACE(ip);
2457
2458         error = XFS_QM_DQATTACH(mp, dp, 0);
2459         if (!error && dp != ip)
2460                 error = XFS_QM_DQATTACH(mp, ip, 0);
2461         if (error) {
2462                 REMOVE_DEBUG_TRACE(__LINE__);
2463                 IRELE(ip);
2464                 goto std_return;
2465         }
2466
2467         tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
2468         cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2469         /*
2470          * We try to get the real space reservation first,
2471          * allowing for directory btree deletion(s) implying
2472          * possible bmap insert(s).  If we can't get the space
2473          * reservation then we use 0 instead, and avoid the bmap
2474          * btree insert(s) in the directory code by, if the bmap
2475          * insert tries to happen, instead trimming the LAST
2476          * block from the directory.
2477          */
2478         resblks = XFS_REMOVE_SPACE_RES(mp);
2479         error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2480                         XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
2481         if (error == ENOSPC) {
2482                 resblks = 0;
2483                 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2484                                 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
2485         }
2486         if (error) {
2487                 ASSERT(error != ENOSPC);
2488                 REMOVE_DEBUG_TRACE(__LINE__);
2489                 xfs_trans_cancel(tp, 0);
2490                 IRELE(ip);
2491                 return error;
2492         }
2493
2494         error = xfs_lock_dir_and_entry(dp, ip);
2495         if (error) {
2496                 REMOVE_DEBUG_TRACE(__LINE__);
2497                 xfs_trans_cancel(tp, cancel_flags);
2498                 IRELE(ip);
2499                 goto std_return;
2500         }
2501
2502         /*
2503          * At this point, we've gotten both the directory and the entry
2504          * inodes locked.
2505          */
2506         xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2507         if (dp != ip) {
2508                 /*
2509                  * Increment vnode ref count only in this case since
2510                  * there's an extra vnode reference in the case where
2511                  * dp == ip.
2512                  */
2513                 IHOLD(dp);
2514                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2515         }
2516
2517         /*
2518          * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2519          */
2520         XFS_BMAP_INIT(&free_list, &first_block);
2521         error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino,
2522                                         &first_block, &free_list, 0);
2523         if (error) {
2524                 ASSERT(error != ENOENT);
2525                 REMOVE_DEBUG_TRACE(__LINE__);
2526                 goto error1;
2527         }
2528         xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2529
2530         dp->i_gen++;
2531         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2532
2533         error = xfs_droplink(tp, ip);
2534         if (error) {
2535                 REMOVE_DEBUG_TRACE(__LINE__);
2536                 goto error1;
2537         }
2538
2539         /* Determine if this is the last link while
2540          * we are in the transaction.
2541          */
2542         link_zero = (ip)->i_d.di_nlink==0;
2543
2544         /*
2545          * Take an extra ref on the inode so that it doesn't
2546          * go to xfs_inactive() from within the commit.
2547          */
2548         IHOLD(ip);
2549
2550         /*
2551          * If this is a synchronous mount, make sure that the
2552          * remove transaction goes to disk before returning to
2553          * the user.
2554          */
2555         if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2556                 xfs_trans_set_sync(tp);
2557         }
2558
2559         error = xfs_bmap_finish(&tp, &free_list, &committed);
2560         if (error) {
2561                 REMOVE_DEBUG_TRACE(__LINE__);
2562                 goto error_rele;
2563         }
2564
2565         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2566         if (error) {
2567                 IRELE(ip);
2568                 goto std_return;
2569         }
2570
2571         /*
2572          * Before we drop our extra reference to the inode, purge it
2573          * from the refcache if it is there.  By waiting until afterwards
2574          * to do the IRELE, we ensure that we won't go inactive in the
2575          * xfs_refcache_purge_ip routine (although that would be OK).
2576          */
2577         xfs_refcache_purge_ip(ip);
2578
2579         /*
2580          * If we are using filestreams, kill the stream association.
2581          * If the file is still open it may get a new one but that
2582          * will get killed on last close in xfs_close() so we don't
2583          * have to worry about that.
2584          */
2585         if (link_zero && xfs_inode_is_filestream(ip))
2586                 xfs_filestream_deassociate(ip);
2587
2588         vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
2589
2590         /*
2591          * Let interposed file systems know about removed links.
2592          */
2593         bhv_vop_link_removed(XFS_ITOV(ip), dir_vp, link_zero);
2594
2595         IRELE(ip);
2596
2597 /*      Fall through to std_return with error = 0 */
2598  std_return:
2599         if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp,
2600                                                 DM_EVENT_POSTREMOVE)) {
2601                 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
2602                                 dir_vp, DM_RIGHT_NULL,
2603                                 NULL, DM_RIGHT_NULL,
2604                                 name, NULL, dm_di_mode, error, 0);
2605         }
2606         return error;
2607
2608  error1:
2609         xfs_bmap_cancel(&free_list);
2610         cancel_flags |= XFS_TRANS_ABORT;
2611         xfs_trans_cancel(tp, cancel_flags);
2612         goto std_return;
2613
2614  error_rele:
2615         /*
2616          * In this case make sure to not release the inode until after
2617          * the current transaction is aborted.  Releasing it beforehand
2618          * can cause us to go to xfs_inactive and start a recursive
2619          * transaction which can easily deadlock with the current one.
2620          */
2621         xfs_bmap_cancel(&free_list);
2622         cancel_flags |= XFS_TRANS_ABORT;
2623         xfs_trans_cancel(tp, cancel_flags);
2624
2625         /*
2626          * Before we drop our extra reference to the inode, purge it
2627          * from the refcache if it is there.  By waiting until afterwards
2628          * to do the IRELE, we ensure that we won't go inactive in the
2629          * xfs_refcache_purge_ip routine (although that would be OK).
2630          */
2631         xfs_refcache_purge_ip(ip);
2632
2633         IRELE(ip);
2634
2635         goto std_return;
2636 }
2637
2638
2639 /*
2640  * xfs_link
2641  *
2642  */
2643 STATIC int
2644 xfs_link(
2645         bhv_desc_t              *target_dir_bdp,
2646         bhv_vnode_t             *src_vp,
2647         bhv_vname_t             *dentry,
2648         cred_t                  *credp)
2649 {
2650         xfs_inode_t             *tdp, *sip;
2651         xfs_trans_t             *tp;
2652         xfs_mount_t             *mp;
2653         xfs_inode_t             *ips[2];
2654         int                     error;
2655         xfs_bmap_free_t         free_list;
2656         xfs_fsblock_t           first_block;
2657         int                     cancel_flags;
2658         int                     committed;
2659         bhv_vnode_t             *target_dir_vp;
2660         int                     resblks;
2661         char                    *target_name = VNAME(dentry);
2662         int                     target_namelen;
2663
2664         target_dir_vp = BHV_TO_VNODE(target_dir_bdp);
2665         vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address);
2666         vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
2667
2668         target_namelen = VNAMELEN(dentry);
2669         ASSERT(!VN_ISDIR(src_vp));
2670
2671         sip = xfs_vtoi(src_vp);
2672         tdp = XFS_BHVTOI(target_dir_bdp);
2673         mp = tdp->i_mount;
2674         if (XFS_FORCED_SHUTDOWN(mp))
2675                 return XFS_ERROR(EIO);
2676
2677         if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) {
2678                 error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
2679                                         target_dir_vp, DM_RIGHT_NULL,
2680                                         src_vp, DM_RIGHT_NULL,
2681                                         target_name, NULL, 0, 0, 0);
2682                 if (error)
2683                         return error;
2684         }
2685
2686         /* Return through std_return after this point. */
2687
2688         error = XFS_QM_DQATTACH(mp, sip, 0);
2689         if (!error && sip != tdp)
2690                 error = XFS_QM_DQATTACH(mp, tdp, 0);
2691         if (error)
2692                 goto std_return;
2693
2694         tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
2695         cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2696         resblks = XFS_LINK_SPACE_RES(mp, target_namelen);
2697         error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
2698                         XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2699         if (error == ENOSPC) {
2700                 resblks = 0;
2701                 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
2702                                 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2703         }
2704         if (error) {
2705                 cancel_flags = 0;
2706                 goto error_return;
2707         }
2708
2709         if (sip->i_ino < tdp->i_ino) {
2710                 ips[0] = sip;
2711                 ips[1] = tdp;
2712         } else {
2713                 ips[0] = tdp;
2714                 ips[1] = sip;
2715         }
2716
2717         xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
2718
2719         /*
2720          * Increment vnode ref counts since xfs_trans_commit &
2721          * xfs_trans_cancel will both unlock the inodes and
2722          * decrement the associated ref counts.
2723          */
2724         VN_HOLD(src_vp);
2725         VN_HOLD(target_dir_vp);
2726         xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
2727         xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
2728
2729         /*
2730          * If the source has too many links, we can't make any more to it.
2731          */
2732         if (sip->i_d.di_nlink >= XFS_MAXLINK) {
2733                 error = XFS_ERROR(EMLINK);
2734                 goto error_return;
2735         }
2736
2737         /*
2738          * If we are using project inheritance, we only allow hard link
2739          * creation in our tree when the project IDs are the same; else
2740          * the tree quota mechanism could be circumvented.
2741          */
2742         if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2743                      (tdp->i_d.di_projid != sip->i_d.di_projid))) {
2744                 error = XFS_ERROR(EXDEV);
2745                 goto error_return;
2746         }
2747
2748         if (resblks == 0 &&
2749             (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen)))
2750                 goto error_return;
2751
2752         XFS_BMAP_INIT(&free_list, &first_block);
2753
2754         error = xfs_dir_createname(tp, tdp, target_name, target_namelen,
2755                                    sip->i_ino, &first_block, &free_list,
2756                                    resblks);
2757         if (error)
2758                 goto abort_return;
2759         xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2760         tdp->i_gen++;
2761         xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2762
2763         error = xfs_bumplink(tp, sip);
2764         if (error)
2765                 goto abort_return;
2766
2767         /*
2768          * If this is a synchronous mount, make sure that the
2769          * link transaction goes to disk before returning to
2770          * the user.
2771          */
2772         if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2773                 xfs_trans_set_sync(tp);
2774         }
2775
2776         error = xfs_bmap_finish (&tp, &free_list, &committed);
2777         if (error) {
2778                 xfs_bmap_cancel(&free_list);
2779                 goto abort_return;
2780         }
2781
2782         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2783         if (error)
2784                 goto std_return;
2785
2786         /* Fall through to std_return with error = 0. */
2787 std_return:
2788         if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip,
2789                                                 DM_EVENT_POSTLINK)) {
2790                 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
2791                                 target_dir_vp, DM_RIGHT_NULL,
2792                                 src_vp, DM_RIGHT_NULL,
2793                                 target_name, NULL, 0, error, 0);
2794         }
2795         return error;
2796
2797  abort_return:
2798         cancel_flags |= XFS_TRANS_ABORT;
2799         /* FALLTHROUGH */
2800
2801  error_return:
2802         xfs_trans_cancel(tp, cancel_flags);
2803         goto std_return;
2804 }
2805
2806
2807 /*
2808  * xfs_mkdir
2809  *
2810  */
2811 STATIC int
2812 xfs_mkdir(
2813         bhv_desc_t              *dir_bdp,
2814         bhv_vname_t             *dentry,
2815         bhv_vattr_t             *vap,
2816         bhv_vnode_t             **vpp,
2817         cred_t                  *credp)
2818 {
2819         char                    *dir_name = VNAME(dentry);
2820         xfs_inode_t             *dp;
2821         xfs_inode_t             *cdp;   /* inode of created dir */
2822         bhv_vnode_t             *cvp;   /* vnode of created dir */
2823         xfs_trans_t             *tp;
2824         xfs_mount_t             *mp;
2825         int                     cancel_flags;
2826         int                     error;
2827         int                     committed;
2828         xfs_bmap_free_t         free_list;
2829         xfs_fsblock_t           first_block;
2830         bhv_vnode_t             *dir_vp;
2831         boolean_t               dp_joined_to_trans;
2832         boolean_t               created = B_FALSE;
2833         int                     dm_event_sent = 0;
2834         xfs_prid_t              prid;
2835         struct xfs_dquot        *udqp, *gdqp;
2836         uint                    resblks;
2837         int                     dm_di_mode;
2838         int                     dir_namelen;
2839
2840         dir_vp = BHV_TO_VNODE(dir_bdp);
2841         dp = XFS_BHVTOI(dir_bdp);
2842         mp = dp->i_mount;
2843
2844         if (XFS_FORCED_SHUTDOWN(mp))
2845                 return XFS_ERROR(EIO);
2846
2847         dir_namelen = VNAMELEN(dentry);
2848
2849         tp = NULL;
2850         dp_joined_to_trans = B_FALSE;
2851         dm_di_mode = vap->va_mode;
2852
2853         if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
2854                 error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
2855                                         dir_vp, DM_RIGHT_NULL, NULL,
2856                                         DM_RIGHT_NULL, dir_name, NULL,
2857                                         dm_di_mode, 0, 0);
2858                 if (error)
2859                         return error;
2860                 dm_event_sent = 1;
2861         }
2862
2863         /* Return through std_return after this point. */
2864
2865         vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
2866
2867         mp = dp->i_mount;
2868         udqp = gdqp = NULL;
2869         if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
2870                 prid = dp->i_d.di_projid;
2871         else if (vap->va_mask & XFS_AT_PROJID)
2872                 prid = (xfs_prid_t)vap->va_projid;
2873         else
2874                 prid = (xfs_prid_t)dfltprid;
2875
2876         /*
2877          * Make sure that we have allocated dquot(s) on disk.
2878          */
2879         error = XFS_QM_DQVOPALLOC(mp, dp,
2880                         current_fsuid(credp), current_fsgid(credp), prid,
2881                         XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2882         if (error)
2883                 goto std_return;
2884
2885         tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
2886         cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2887         resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen);
2888         error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
2889                                   XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
2890         if (error == ENOSPC) {
2891                 resblks = 0;
2892                 error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0,
2893                                           XFS_TRANS_PERM_LOG_RES,
2894                                           XFS_MKDIR_LOG_COUNT);
2895         }
2896         if (error) {
2897                 cancel_flags = 0;
2898                 dp = NULL;
2899                 goto error_return;
2900         }
2901
2902         xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
2903
2904         /*
2905          * Check for directory link count overflow.
2906          */
2907         if (dp->i_d.di_nlink >= XFS_MAXLINK) {
2908                 error = XFS_ERROR(EMLINK);
2909                 goto error_return;
2910         }
2911
2912         /*
2913          * Reserve disk quota and the inode.
2914          */
2915         error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
2916         if (error)
2917                 goto error_return;
2918
2919         if (resblks == 0 &&
2920             (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen)))
2921                 goto error_return;
2922         /*
2923          * create the directory inode.
2924          */
2925         error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2,
2926                         0, credp, prid, resblks > 0,
2927                 &cdp, NULL);
2928         if (error) {
2929                 if (error == ENOSPC)
2930                         goto error_return;
2931                 goto abort_return;
2932         }
2933         ITRACE(cdp);
2934
2935         /*
2936          * Now we add the directory inode to the transaction.
2937          * We waited until now since xfs_dir_ialloc might start
2938          * a new transaction.  Had we joined the transaction
2939          * earlier, the locks might have gotten released.
2940          */
2941         VN_HOLD(dir_vp);
2942         xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2943         dp_joined_to_trans = B_TRUE;
2944
2945         XFS_BMAP_INIT(&free_list, &first_block);
2946
2947         error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino,
2948                                    &first_block, &free_list, resblks ?
2949                                    resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2950         if (error) {
2951                 ASSERT(error != ENOSPC);
2952                 goto error1;
2953         }
2954         xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2955
2956         /*
2957          * Bump the in memory version number of the parent directory
2958          * so that other processes accessing it will recognize that
2959          * the directory has changed.
2960          */
2961         dp->i_gen++;
2962
2963         error = xfs_dir_init(tp, cdp, dp);
2964         if (error)
2965                 goto error2;
2966
2967         cdp->i_gen = 1;
2968         error = xfs_bumplink(tp, dp);
2969         if (error)
2970                 goto error2;
2971
2972         cvp = XFS_ITOV(cdp);
2973
2974         created = B_TRUE;
2975
2976         *vpp = cvp;
2977         IHOLD(cdp);
2978
2979         /*
2980          * Attach the dquots to the new inode and modify the icount incore.
2981          */
2982         XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
2983
2984         /*
2985          * If this is a synchronous mount, make sure that the
2986          * mkdir transaction goes to disk before returning to
2987          * the user.
2988          */
2989         if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2990                 xfs_trans_set_sync(tp);
2991         }
2992
2993         error = xfs_bmap_finish(&tp, &free_list, &committed);
2994         if (error) {
2995                 IRELE(cdp);
2996                 goto error2;
2997         }
2998
2999         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3000         XFS_QM_DQRELE(mp, udqp);
3001         XFS_QM_DQRELE(mp, gdqp);
3002         if (error) {
3003                 IRELE(cdp);
3004         }
3005
3006         /* Fall through to std_return with error = 0 or errno from
3007          * xfs_trans_commit. */
3008
3009 std_return:
3010         if ( (created || (error != 0 && dm_event_sent != 0)) &&
3011                         DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
3012                                                 DM_EVENT_POSTCREATE)) {
3013                 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
3014                                         dir_vp, DM_RIGHT_NULL,
3015                                         created ? XFS_ITOV(cdp):NULL,
3016                                         DM_RIGHT_NULL,
3017                                         dir_name, NULL,
3018                                         dm_di_mode, error, 0);
3019         }
3020         return error;
3021
3022  error2:
3023  error1:
3024         xfs_bmap_cancel(&free_list);
3025  abort_return:
3026         cancel_flags |= XFS_TRANS_ABORT;
3027  error_return:
3028         xfs_trans_cancel(tp, cancel_flags);
3029         XFS_QM_DQRELE(mp, udqp);
3030         XFS_QM_DQRELE(mp, gdqp);
3031
3032         if (!dp_joined_to_trans && (dp != NULL)) {
3033                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
3034         }
3035
3036         goto std_return;
3037 }
3038
3039
3040 /*
3041  * xfs_rmdir
3042  *
3043  */
3044 STATIC int
3045 xfs_rmdir(
3046         bhv_desc_t              *dir_bdp,
3047         bhv_vname_t             *dentry,
3048         cred_t                  *credp)
3049 {
3050         char                    *name = VNAME(dentry);
3051         xfs_inode_t             *dp;
3052         xfs_inode_t             *cdp;   /* child directory */
3053         xfs_trans_t             *tp;
3054         xfs_mount_t             *mp;
3055         int                     error;
3056         xfs_bmap_free_t         free_list;
3057         xfs_fsblock_t           first_block;
3058         int                     cancel_flags;
3059         int                     committed;
3060         bhv_vnode_t             *dir_vp;
3061         int                     dm_di_mode = S_IFDIR;
3062         int                     last_cdp_link;
3063         int                     namelen;
3064         uint                    resblks;
3065
3066         dir_vp = BHV_TO_VNODE(dir_bdp);
3067         dp = XFS_BHVTOI(dir_bdp);
3068         mp = dp->i_mount;
3069
3070         vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
3071
3072         if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount))
3073                 return XFS_ERROR(EIO);
3074         namelen = VNAMELEN(dentry);
3075
3076         if (!xfs_get_dir_entry(dentry, &cdp)) {
3077                 dm_di_mode = cdp->i_d.di_mode;
3078                 IRELE(cdp);
3079         }