fs/open.c

   1 /*
   2  *  linux/fs/open.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/string.h>
   8 #include <linux/mm.h>
   9 #include <linux/utime.h>
  10 #include <linux/file.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/quotaops.h>
  13 #include <linux/fsnotify.h>
  14 #include <linux/module.h>
  15 #include <linux/slab.h>
  16 #include <linux/tty.h>
  17 #include <linux/namei.h>
  18 #include <linux/backing-dev.h>
  19 #include <linux/security.h>
  20 #include <linux/mount.h>
  21 #include <linux/vfs.h>
  22 #include <asm/uaccess.h>
  23 #include <linux/fs.h>
  24 #include <linux/personality.h>
  25 #include <linux/pagemap.h>
  26 #include <linux/syscalls.h>
  27
  28 #include <asm/unistd.h>
  29
  30 int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
  31 {
  32         int retval = -ENODEV;
  33
  34         if (sb) {
  35                 retval = -ENOSYS;
  36                 if (sb->s_op->statfs) {
  37                         memset(buf, 0, sizeof(*buf));
  38                         retval = security_sb_statfs(sb);
  39                         if (retval)
  40                                 return retval;
  41                         retval = sb->s_op->statfs(sb, buf);
  42                         if (retval == 0 && buf->f_frsize == 0)
  43                                 buf->f_frsize = buf->f_bsize;
  44                 }
  45         }
  46         return retval;
  47 }
  48
  49 EXPORT_SYMBOL(vfs_statfs);
  50
  51 static int vfs_statfs_native(struct super_block *sb, struct statfs *buf)
  52 {
  53         struct kstatfs st;
  54         int retval;
  55
  56         retval = vfs_statfs(sb, &st);
  57         if (retval)
  58                 return retval;
  59
  60         if (sizeof(*buf) == sizeof(st))
  61                 memcpy(buf, &st, sizeof(st));
  62         else {
  63                 if (sizeof buf->f_blocks == 4) {
  64                         if ((st.f_blocks | st.f_bfree | st.f_bavail) &
  65                             0xffffffff00000000ULL)
  66                                 return -EOVERFLOW;
  67                         /*
  68                          * f_files and f_ffree may be -1; it's okay to stuff
  69                          * that into 32 bits
  70                          */
  71                         if (st.f_files != -1 &&
  72                             (st.f_files & 0xffffffff00000000ULL))
  73                                 return -EOVERFLOW;
  74                         if (st.f_ffree != -1 &&
  75                             (st.f_ffree & 0xffffffff00000000ULL))
  76                                 return -EOVERFLOW;
  77                 }
  78
  79                 buf->f_type = st.f_type;
  80                 buf->f_bsize = st.f_bsize;
  81                 buf->f_blocks = st.f_blocks;
  82                 buf->f_bfree = st.f_bfree;
  83                 buf->f_bavail = st.f_bavail;
  84                 buf->f_files = st.f_files;
  85                 buf->f_ffree = st.f_ffree;
  86                 buf->f_fsid = st.f_fsid;
  87                 buf->f_namelen = st.f_namelen;
  88                 buf->f_frsize = st.f_frsize;
  89                 memset(buf->f_spare, 0, sizeof(buf->f_spare));
  90         }
  91         return 0;
  92 }
  93
  94 static int vfs_statfs64(struct super_block *sb, struct statfs64 *buf)
  95 {
  96         struct kstatfs st;
  97         int retval;
  98
  99         retval = vfs_statfs(sb, &st);
 100         if (retval)
 101                 return retval;
 102
 103         if (sizeof(*buf) == sizeof(st))
 104                 memcpy(buf, &st, sizeof(st));
 105         else {
 106                 buf->f_type = st.f_type;
 107                 buf->f_bsize = st.f_bsize;
 108                 buf->f_blocks = st.f_blocks;
 109                 buf->f_bfree = st.f_bfree;
 110                 buf->f_bavail = st.f_bavail;
 111                 buf->f_files = st.f_files;
 112                 buf->f_ffree = st.f_ffree;
 113                 buf->f_fsid = st.f_fsid;
 114                 buf->f_namelen = st.f_namelen;
 115                 buf->f_frsize = st.f_frsize;
 116                 memset(buf->f_spare, 0, sizeof(buf->f_spare));
 117         }
 118         return 0;
 119 }
 120
 121 asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
 122 {
 123         struct nameidata nd;
 124         int error;
 125
 126         error = user_path_walk(path, &nd);
 127         if (!error) {
 128                 struct statfs tmp;
 129                 error = vfs_statfs_native(nd.dentry->d_inode->i_sb, &tmp);
 130                 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 131                         error = -EFAULT;
 132                 path_release(&nd);
 133         }
 134         return error;
 135 }
 136
 137
 138 asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
 139 {
 140         struct nameidata nd;
 141         long error;
 142
 143         if (sz != sizeof(*buf))
 144                 return -EINVAL;
 145         error = user_path_walk(path, &nd);
 146         if (!error) {
 147                 struct statfs64 tmp;
 148                 error = vfs_statfs64(nd.dentry->d_inode->i_sb, &tmp);
 149                 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 150                         error = -EFAULT;
 151                 path_release(&nd);
 152         }
 153         return error;
 154 }
 155
 156
 157 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf)
 158 {
 159         struct file * file;
 160         struct statfs tmp;
 161         int error;
 162
 163         error = -EBADF;
 164         file = fget(fd);
 165         if (!file)
 166                 goto out;
 167         error = vfs_statfs_native(file->f_dentry->d_inode->i_sb, &tmp);
 168         if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 169                 error = -EFAULT;
 170         fput(file);
 171 out:
 172         return error;
 173 }
 174
 175 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf)
 176 {
 177         struct file * file;
 178         struct statfs64 tmp;
 179         int error;
 180
 181         if (sz != sizeof(*buf))
 182                 return -EINVAL;
 183
 184         error = -EBADF;
 185         file = fget(fd);
 186         if (!file)
 187                 goto out;
 188         error = vfs_statfs64(file->f_dentry->d_inode->i_sb, &tmp);
 189         if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 190                 error = -EFAULT;
 191         fput(file);
 192 out:
 193         return error;
 194 }
 195
 196 int do_truncate(struct dentry *dentry, loff_t length)
 197 {
 198         int err;
 199         struct iattr newattrs;
 200
 201         /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
 202         if (length < 0)
 203                 return -EINVAL;
 204
 205         newattrs.ia_size = length;
 206         newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 207
 208         down(&dentry->d_inode->i_sem);
 209         err = notify_change(dentry, &newattrs);
 210         up(&dentry->d_inode->i_sem);
 211         return err;
 212 }
 213
 214 static inline long do_sys_truncate(const char __user * path, loff_t length)
 215 {
 216         struct nameidata nd;
 217         struct inode * inode;
 218         int error;
 219
 220         error = -EINVAL;
 221         if (length < 0) /* sorry, but loff_t says... */
 222                 goto out;
 223
 224         error = user_path_walk(path, &nd);
 225         if (error)
 226                 goto out;
 227         inode = nd.dentry->d_inode;
 228
 229         /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 230         error = -EISDIR;
 231         if (S_ISDIR(inode->i_mode))
 232                 goto dput_and_out;
 233
 234         error = -EINVAL;
 235         if (!S_ISREG(inode->i_mode))
 236                 goto dput_and_out;
 237
 238         error = permission(inode,MAY_WRITE,&nd);
 239         if (error)
 240                 goto dput_and_out;
 241
 242         error = -EROFS;
 243         if (IS_RDONLY(inode))
 244                 goto dput_and_out;
 245
 246         error = -EPERM;
 247         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 248                 goto dput_and_out;
 249
 250         /*
 251          * Make sure that there are no leases.
 252          */
 253         error = break_lease(inode, FMODE_WRITE);
 254         if (error)
 255                 goto dput_and_out;
 256
 257         error = get_write_access(inode);
 258         if (error)
 259                 goto dput_and_out;
 260
 261         error = locks_verify_truncate(inode, NULL, length);
 262         if (!error) {
 263                 DQUOT_INIT(inode);
 264                 error = do_truncate(nd.dentry, length);
 265         }
 266         put_write_access(inode);
 267
 268 dput_and_out:
 269         path_release(&nd);
 270 out:
 271         return error;
 272 }
 273
 274 asmlinkage long sys_truncate(const char __user * path, unsigned long length)
 275 {
 276         /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
 277         return do_sys_truncate(path, (long)length);
 278 }
 279
 280 static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 281 {
 282         struct inode * inode;
 283         struct dentry *dentry;
 284         struct file * file;
 285         int error;
 286
 287         error = -EINVAL;
 288         if (length < 0)
 289                 goto out;
 290         error = -EBADF;
 291         file = fget(fd);
 292         if (!file)
 293                 goto out;
 294
 295         /* explicitly opened as large or we are on 64-bit box */
 296         if (file->f_flags & O_LARGEFILE)
 297                 small = 0;
 298
 299         dentry = file->f_dentry;
 300         inode = dentry->d_inode;
 301         error = -EINVAL;
 302         if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
 303                 goto out_putf;
 304
 305         error = -EINVAL;
 306         /* Cannot ftruncate over 2^31 bytes without large file support */
 307         if (small && length > MAX_NON_LFS)
 308                 goto out_putf;
 309
 310         error = -EPERM;
 311         if (IS_APPEND(inode))
 312                 goto out_putf;
 313
 314         error = locks_verify_truncate(inode, file, length);
 315         if (!error)
 316                 error = do_truncate(dentry, length);
 317 out_putf:
 318         fput(file);
 319 out:
 320         return error;
 321 }
 322
 323 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
 324 {
 325         return do_sys_ftruncate(fd, length, 1);
 326 }
 327
 328 /* LFS versions of truncate are only needed on 32 bit machines */
 329 #if BITS_PER_LONG == 32
 330 asmlinkage long sys_truncate64(const char __user * path, loff_t length)
 331 {
 332         return do_sys_truncate(path, length);
 333 }
 334
 335 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
 336 {
 337         return do_sys_ftruncate(fd, length, 0);
 338 }
 339 #endif
 340
 341 #ifdef __ARCH_WANT_SYS_UTIME
 342
 343 /*
 344  * sys_utime() can be implemented in user-level using sys_utimes().
 345  * Is this for backwards compatibility?  If so, why not move it
 346  * into the appropriate arch directory (for those architectures that
 347  * need it).
 348  */
 349
 350 /* If times==NULL, set access and modification to current time,
 351  * must be owner or have write permission.
 352  * Else, update from *times, must be owner or super user.
 353  */
 354 asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times)
 355 {
 356         int error;
 357         struct nameidata nd;
 358         struct inode * inode;
 359         struct iattr newattrs;
 360
 361         error = user_path_walk(filename, &nd);
 362         if (error)
 363                 goto out;
 364         inode = nd.dentry->d_inode;
 365
 366         error = -EROFS;
 367         if (IS_RDONLY(inode))
 368                 goto dput_and_out;
 369
 370         /* Don't worry, the checks are done in inode_change_ok() */
 371         newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 372         if (times) {
 373                 error = -EPERM;
 374                 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 375                         goto dput_and_out;
 376
 377                 error = get_user(newattrs.ia_atime.tv_sec, &times->actime);
 378                 newattrs.ia_atime.tv_nsec = 0;
 379                 if (!error)
 380                         error = get_user(newattrs.ia_mtime.tv_sec, &times->modtime);
 381                 newattrs.ia_mtime.tv_nsec = 0;
 382                 if (error)
 383                         goto dput_and_out;
 384
 385                 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 386         } else {
 387                 error = -EACCES;
 388                 if (IS_IMMUTABLE(inode))
 389                         goto dput_and_out;
 390
 391                 if (current->fsuid != inode->i_uid &&
 392                     (error = permission(inode,MAY_WRITE,&nd)) != 0)
 393                         goto dput_and_out;
 394         }
 395         down(&inode->i_sem);
 396         error = notify_change(nd.dentry, &newattrs);
 397         up(&inode->i_sem);
 398 dput_and_out:
 399         path_release(&nd);
 400 out:
 401         return error;
 402 }
 403
 404 #endif
 405
 406 /* If times==NULL, set access and modification to current time,
 407  * must be owner or have write permission.
 408  * Else, update from *times, must be owner or super user.
 409  */
 410 long do_utimes(char __user * filename, struct timeval * times)
 411 {
 412         int error;
 413         struct nameidata nd;
 414         struct inode * inode;
 415         struct iattr newattrs;
 416
 417         error = user_path_walk(filename, &nd);
 418
 419         if (error)
 420                 goto out;
 421         inode = nd.dentry->d_inode;
 422
 423         error = -EROFS;
 424         if (IS_RDONLY(inode))
 425                 goto dput_and_out;
 426
 427         /* Don't worry, the checks are done in inode_change_ok() */
 428         newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 429         if (times) {
 430                 error = -EPERM;
 431                 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 432                         goto dput_and_out;
 433
 434                 newattrs.ia_atime.tv_sec = times[0].tv_sec;
 435                 newattrs.ia_atime.tv_nsec = times[0].tv_usec * 1000;
 436                 newattrs.ia_mtime.tv_sec = times[1].tv_sec;
 437                 newattrs.ia_mtime.tv_nsec = times[1].tv_usec * 1000;
 438                 newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
 439         } else {
 440                 error = -EACCES;
 441                 if (IS_IMMUTABLE(inode))
 442                         goto dput_and_out;
 443
 444                 if (current->fsuid != inode->i_uid &&
 445                     (error = permission(inode,MAY_WRITE,&nd)) != 0)
 446                         goto dput_and_out;
 447         }
 448         down(&inode->i_sem);
 449         error = notify_change(nd.dentry, &newattrs);
 450         up(&inode->i_sem);
 451 dput_and_out:
 452         path_release(&nd);
 453 out:
 454         return error;
 455 }
 456
 457 asmlinkage long sys_utimes(char __user * filename, struct timeval __user * utimes)
 458 {
 459         struct timeval times[2];
 460
 461         if (utimes && copy_from_user(&times, utimes, sizeof(times)))
 462                 return -EFAULT;
 463         return do_utimes(filename, utimes ? times : NULL);
 464 }
 465
 466
 467 /*
 468  * access() needs to use the real uid/gid, not the effective uid/gid.
 469  * We do this by temporarily clearing all FS-related capabilities and
 470  * switching the fsuid/fsgid around to the real ones.
 471  */
 472 asmlinkage long sys_access(const char __user * filename, int mode)
 473 {
 474         struct nameidata nd;
 475         int old_fsuid, old_fsgid;
 476         kernel_cap_t old_cap;
 477         int res;
 478
 479         if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
 480                 return -EINVAL;
 481
 482         old_fsuid = current->fsuid;
 483         old_fsgid = current->fsgid;
 484         old_cap = current->cap_effective;
 485
 486         current->fsuid = current->uid;
 487         current->fsgid = current->gid;
 488
 489         /*
 490          * Clear the capabilities if we switch to a non-root user
 491          *
 492          * FIXME: There is a race here against sys_capset.  The
 493          * capabilities can change yet we will restore the old
 494          * value below.  We should hold task_capabilities_lock,
 495          * but we cannot because user_path_walk can sleep.
 496          */
 497         if (current->uid)
 498                 cap_clear(current->cap_effective);
 499         else
 500                 current->cap_effective = current->cap_permitted;
 501
 502         res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
 503         if (!res) {
 504                 res = permission(nd.dentry->d_inode, mode, &nd);
 505                 /* SuS v2 requires we report a read only fs too */
 506                 if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
 507                    && !special_file(nd.dentry->d_inode->i_mode))
 508                         res = -EROFS;
 509                 path_release(&nd);
 510         }
 511
 512         current->fsuid = old_fsuid;
 513         current->fsgid = old_fsgid;
 514         current->cap_effective = old_cap;
 515
 516         return res;
 517 }
 518
 519 asmlinkage long sys_chdir(const char __user * filename)
 520 {
 521         struct nameidata nd;
 522         int error;
 523
 524         error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
 525         if (error)
 526                 goto out;
 527
 528         error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
 529         if (error)
 530                 goto dput_and_out;
 531
 532         set_fs_pwd(current->fs, nd.mnt, nd.dentry);
 533
 534 dput_and_out:
 535         path_release(&nd);
 536 out:
 537         return error;
 538 }
 539
 540 asmlinkage long sys_fchdir(unsigned int fd)
 541 {
 542         struct file *file;
 543         struct dentry *dentry;
 544         struct inode *inode;
 545         struct vfsmount *mnt;
 546         int error;
 547
 548         error = -EBADF;
 549         file = fget(fd);
 550         if (!file)
 551                 goto out;
 552
 553         dentry = file->f_dentry;
 554         mnt = file->f_vfsmnt;
 555         inode = dentry->d_inode;
 556
 557         error = -ENOTDIR;
 558         if (!S_ISDIR(inode->i_mode))
 559                 goto out_putf;
 560
 561         error = permission(inode, MAY_EXEC, NULL);
 562         if (!error)
 563                 set_fs_pwd(current->fs, mnt, dentry);
 564 out_putf:
 565         fput(file);
 566 out:
 567         return error;
 568 }
 569
 570 asmlinkage long sys_chroot(const char __user * filename)
 571 {
 572         struct nameidata nd;
 573         int error;
 574
 575         error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
 576         if (error)
 577                 goto out;
 578
 579         error = permission(nd.dentry->d_inode,MAY_EXEC,&nd);
 580         if (error)
 581                 goto dput_and_out;
 582
 583         error = -EPERM;
 584         if (!capable(CAP_SYS_CHROOT))
 585                 goto dput_and_out;
 586
 587         set_fs_root(current->fs, nd.mnt, nd.dentry);
 588         set_fs_altroot();
 589         error = 0;
 590 dput_and_out:
 591         path_release(&nd);
 592 out:
 593         return error;
 594 }
 595
 596 asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 597 {
 598         struct inode * inode;
 599         struct dentry * dentry;
 600         struct file * file;
 601         int err = -EBADF;
 602         struct iattr newattrs;
 603
 604         file = fget(fd);
 605         if (!file)
 606                 goto out;
 607
 608         dentry = file->f_dentry;
 609         inode = dentry->d_inode;
 610
 611         err = -EROFS;
 612         if (IS_RDONLY(inode))
 613                 goto out_putf;
 614         err = -EPERM;
 615         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 616                 goto out_putf;
 617         down(&inode->i_sem);
 618         if (mode == (mode_t) -1)
 619                 mode = inode->i_mode;
 620         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 621         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 622         err = notify_change(dentry, &newattrs);
 623         up(&inode->i_sem);
 624
 625 out_putf:
 626         fput(file);
 627 out:
 628         return err;
 629 }
 630
 631 asmlinkage long sys_chmod(const char __user * filename, mode_t mode)
 632 {
 633         struct nameidata nd;
 634         struct inode * inode;
 635         int error;
 636         struct iattr newattrs;
 637
 638         error = user_path_walk(filename, &nd);
 639         if (error)
 640                 goto out;
 641         inode = nd.dentry->d_inode;
 642
 643         error = -EROFS;
 644         if (IS_RDONLY(inode))
 645                 goto dput_and_out;
 646
 647         error = -EPERM;
 648         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 649                 goto dput_and_out;
 650
 651         down(&inode->i_sem);
 652         if (mode == (mode_t) -1)
 653                 mode = inode->i_mode;
 654         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 655         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 656         error = notify_change(nd.dentry, &newattrs);
 657         up(&inode->i_sem);
 658
 659 dput_and_out:
 660         path_release(&nd);
 661 out:
 662         return error;
 663 }
 664
 665 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 666 {
 667         struct inode * inode;
 668         int error;
 669         struct iattr newattrs;
 670
 671         error = -ENOENT;
 672         if (!(inode = dentry->d_inode)) {
 673                 printk(KERN_ERR "chown_common: NULL inode\n");
 674                 goto out;
 675         }
 676         error = -EROFS;
 677         if (IS_RDONLY(inode))
 678                 goto out;
 679         error = -EPERM;
 680         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 681                 goto out;
 682         newattrs.ia_valid =  ATTR_CTIME;
 683         if (user != (uid_t) -1) {
 684                 newattrs.ia_valid |= ATTR_UID;
 685                 newattrs.ia_uid = user;
 686         }
 687         if (group != (gid_t) -1) {
 688                 newattrs.ia_valid |= ATTR_GID;
 689                 newattrs.ia_gid = group;
 690         }
 691         if (!S_ISDIR(inode->i_mode))
 692                 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
 693         down(&inode->i_sem);
 694         error = notify_change(dentry, &newattrs);
 695         up(&inode->i_sem);
 696 out:
 697         return error;
 698 }
 699
 700 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 701 {
 702         struct nameidata nd;
 703         int error;
 704
 705         error = user_path_walk(filename, &nd);
 706         if (!error) {
 707                 error = chown_common(nd.dentry, user, group);
 708                 path_release(&nd);
 709         }
 710         return error;
 711 }
 712
 713 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 714 {
 715         struct nameidata nd;
 716         int error;
 717
 718         error = user_path_walk_link(filename, &nd);
 719         if (!error) {
 720                 error = chown_common(nd.dentry, user, group);
 721                 path_release(&nd);
 722         }
 723         return error;
 724 }
 725
 726
 727 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 728 {
 729         struct file * file;
 730         int error = -EBADF;
 731
 732         file = fget(fd);
 733         if (file) {
 734                 error = chown_common(file->f_dentry, user, group);
 735                 fput(file);
 736         }
 737         return error;
 738 }
 739
 740 /*
 741  * Note that while the flag value (low two bits) for sys_open means:
 742  *      00 - read-only
 743  *      01 - write-only
 744  *      10 - read-write
 745  *      11 - special
 746  * it is changed into
 747  *      00 - no permissions needed
 748  *      01 - read-permission
 749  *      10 - write-permission
 750  *      11 - read-write
 751  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
 752  * used by symlinks.
 753  */
 754 struct file *filp_open(const char * filename, int flags, int mode)
 755 {
 756         int namei_flags, error;
 757         struct nameidata nd;
 758
 759         namei_flags = flags;
 760         if ((namei_flags+1) & O_ACCMODE)
 761                 namei_flags++;
 762         if (namei_flags & O_TRUNC)
 763                 namei_flags |= 2;
 764
 765         error = open_namei(filename, namei_flags, mode, &nd);
 766         if (!error)
 767                 return dentry_open(nd.dentry, nd.mnt, flags);
 768
 769         return ERR_PTR(error);
 770 }
 771
 772 EXPORT_SYMBOL(filp_open);
 773
 774 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 775 {
 776         struct file * f;
 777         struct inode *inode;
 778         int error;
 779
 780         error = -ENFILE;
 781         f = get_empty_filp();
 782         if (!f)
 783                 goto cleanup_dentry;
 784         f->f_flags = flags;
 785         f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 786         inode = dentry->d_inode;
 787         if (f->f_mode & FMODE_WRITE) {
 788                 error = get_write_access(inode);
 789                 if (error)
 790                         goto cleanup_file;
 791         }
 792
 793         f->f_mapping = inode->i_mapping;
 794         f->f_dentry = dentry;
 795         f->f_vfsmnt = mnt;
 796         f->f_pos = 0;
 797         f->f_op = fops_get(inode->i_fop);
 798         file_move(f, &inode->i_sb->s_files);
 799
 800         if (f->f_op && f->f_op->open) {
 801                 error = f->f_op->open(inode,f);
 802                 if (error)
 803                         goto cleanup_all;
 804         }
 805         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 806
 807         file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 808
 809         /* NB: we're sure to have correct a_ops only after f_op->open */
 810         if (f->f_flags & O_DIRECT) {
 811                 if (!f->f_mapping->a_ops ||
 812                     ((!f->f_mapping->a_ops->direct_IO) &&
 813                     (!f->f_mapping->a_ops->get_xip_page))) {
 814                         fput(f);
 815                         f = ERR_PTR(-EINVAL);
 816                 }
 817         }
 818
 819         return f;
 820
 821 cleanup_all:
 822         fops_put(f->f_op);
 823         if (f->f_mode & FMODE_WRITE)
 824                 put_write_access(inode);
 825         file_kill(f);
 826         f->f_dentry = NULL;
 827         f->f_vfsmnt = NULL;
 828 cleanup_file:
 829         put_filp(f);
 830 cleanup_dentry:
 831         dput(dentry);
 832         mntput(mnt);
 833         return ERR_PTR(error);
 834 }
 835
 836 EXPORT_SYMBOL(dentry_open);
 837
 838 /*
 839  * Find an empty file descriptor entry, and mark it busy.
 840  */
 841 int get_unused_fd(void)
 842 {
 843         struct files_struct * files = current->files;
 844         int fd, error;
 845
 846         error = -EMFILE;
 847         spin_lock(&files->file_lock);
 848
 849 repeat:
 850         fd = find_next_zero_bit(files->open_fds->fds_bits,
 851                                 files->max_fdset,
 852                                 files->next_fd);
 853
 854         /*
 855          * N.B. For clone tasks sharing a files structure, this test
 856          * will limit the total number of files that can be opened.
 857          */
 858         if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 859                 goto out;
 860
 861         /* Do we need to expand the fd array or fd set?  */
 862         error = expand_files(files, fd);
 863         if (error < 0)
 864                 goto out;
 865
 866         if (error) {
 867                 /*
 868                  * If we needed to expand the fs array we
 869                  * might have blocked - try again.
 870                  */
 871                 error = -EMFILE;
 872                 goto repeat;
 873         }
 874
 875         FD_SET(fd, files->open_fds);
 876         FD_CLR(fd, files->close_on_exec);
 877         files->next_fd = fd + 1;
 878 #if 1
 879         /* Sanity check */
 880         if (files->fd[fd] != NULL) {
 881                 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
 882                 files->fd[fd] = NULL;
 883         }
 884 #endif
 885         error = fd;
 886
 887 out:
 888         spin_unlock(&files->file_lock);
 889         return error;
 890 }
 891
 892 EXPORT_SYMBOL(get_unused_fd);
 893
 894 static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
 895 {
 896         __FD_CLR(fd, files->open_fds);
 897         if (fd < files->next_fd)
 898                 files->next_fd = fd;
 899 }
 900
 901 void fastcall put_unused_fd(unsigned int fd)
 902 {
 903         struct files_struct *files = current->files;
 904         spin_lock(&files->file_lock);
 905         __put_unused_fd(files, fd);
 906         spin_unlock(&files->file_lock);
 907 }
 908
 909 EXPORT_SYMBOL(put_unused_fd);
 910
 911 /*
 912  * Install a file pointer in the fd array.
 913  *
 914  * The VFS is full of places where we drop the files lock between
 915  * setting the open_fds bitmap and installing the file in the file
 916  * array.  At any such point, we are vulnerable to a dup2() race
 917  * installing a file in the array before us.  We need to detect this and
 918  * fput() the struct file we are about to overwrite in this case.
 919  *
 920  * It should never happen - if we allow dup2() do it, _really_ bad things
 921  * will follow.
 922  */
 923
 924 void fastcall fd_install(unsigned int fd, struct file * file)
 925 {
 926         struct files_struct *files = current->files;
 927         spin_lock(&files->file_lock);
 928         if (unlikely(files->fd[fd] != NULL))
 929                 BUG();
 930         files->fd[fd] = file;
 931         spin_unlock(&files->file_lock);
 932 }
 933
 934 EXPORT_SYMBOL(fd_install);
 935
 936 long do_sys_open(const char __user *filename, int flags, int mode)
 937 {
 938         char *tmp = getname(filename);
 939         int fd = PTR_ERR(tmp);
 940
 941         if (!IS_ERR(tmp)) {
 942                 fd = get_unused_fd();
 943                 if (fd >= 0) {
 944                         struct file *f = filp_open(tmp, flags, mode);
 945                         if (IS_ERR(f)) {
 946                                 put_unused_fd(fd);
 947                                 fd = PTR_ERR(f);
 948                         } else {
 949                                 fsnotify_open(f->f_dentry);
 950                                 fd_install(fd, f);
 951                         }
 952                 }
 953                 putname(tmp);
 954         }
 955         return fd;
 956 }
 957
 958 asmlinkage long sys_open(const char __user *filename, int flags, int mode)
 959 {
 960         if (force_o_largefile())
 961                 flags |= O_LARGEFILE;
 962
 963         return do_sys_open(filename, flags, mode);
 964 }
 965 EXPORT_SYMBOL_GPL(sys_open);
 966
 967 #ifndef __alpha__
 968
 969 /*
 970  * For backward compatibility?  Maybe this should be moved
 971  * into arch/i386 instead?
 972  */
 973 asmlinkage long sys_creat(const char __user * pathname, int mode)
 974 {
 975         return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
 976 }
 977
 978 #endif
 979
 980 /*
 981  * "id" is the POSIX thread ID. We use the
 982  * files pointer for this..
 983  */
 984 int filp_close(struct file *filp, fl_owner_t id)
 985 {
 986         int retval = 0;
 987
 988         if (!file_count(filp)) {
 989                 printk(KERN_ERR "VFS: Close: file count is 0\n");
 990                 return 0;
 991         }
 992
 993         if (filp->f_op && filp->f_op->flush)
 994                 retval = filp->f_op->flush(filp);
 995
 996         dnotify_flush(filp, id);
 997         locks_remove_posix(filp, id);
 998         fput(filp);
 999         return retval;
1000 }
1001
1002 EXPORT_SYMBOL(filp_close);
1003
1004 /*
1005  * Careful here! We test whether the file pointer is NULL before
1006  * releasing the fd. This ensures that one clone task can't release
1007  * an fd while another clone is opening it.
1008  */
1009 asmlinkage long sys_close(unsigned int fd)
1010 {
1011         struct file * filp;
1012         struct files_struct *files = current->files;
1013
1014         spin_lock(&files->file_lock);
1015         if (fd >= files->max_fds)
1016                 goto out_unlock;
1017         filp = files->fd[fd];
1018         if (!filp)
1019                 goto out_unlock;
1020         files->fd[fd] = NULL;
1021         FD_CLR(fd, files->close_on_exec);
1022         __put_unused_fd(files, fd);
1023         spin_unlock(&files->file_lock);
1024         return filp_close(filp, files);
1025
1026 out_unlock:
1027         spin_unlock(&files->file_lock);
1028         return -EBADF;
1029 }
1030
1031 EXPORT_SYMBOL(sys_close);
1032
1033 /*
1034  * This routine simulates a hangup on the tty, to arrange that users
1035  * are given clean terminals at login time.
1036  */
1037 asmlinkage long sys_vhangup(void)
1038 {
1039         if (capable(CAP_SYS_TTY_CONFIG)) {
1040                 tty_vhangup(current->signal->tty);
1041                 return 0;
1042         }
1043         return -EPERM;
1044 }
1045
1046 /*
1047  * Called when an inode is about to be open.
1048  * We use this to disallow opening large files on 32bit systems if
1049  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1050  * on this flag in sys_open.
1051  */
1052 int generic_file_open(struct inode * inode, struct file * filp)
1053 {
1054         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1055                 return -EFBIG;
1056         return 0;
1057 }
1058
1059 EXPORT_SYMBOL(generic_file_open);
1060
1061 /*
1062  * This is used by subsystems that don't want seekable
1063  * file descriptors
1064  */
1065 int nonseekable_open(struct inode *inode, struct file *filp)
1066 {
1067         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1068         return 0;
1069 }
1070
1071 EXPORT_SYMBOL(nonseekable_open);