4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
45 static inline int cifs_convert_flags(unsigned int flags)
47 if ((flags & O_ACCMODE) == O_RDONLY)
49 else if ((flags & O_ACCMODE) == O_WRONLY)
51 else if ((flags & O_ACCMODE) == O_RDWR) {
52 /* GENERIC_ALL is too much permission to request
53 can cause unnecessary access denied on create */
54 /* return GENERIC_ALL; */
55 return (GENERIC_READ | GENERIC_WRITE);
58 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 static u32 cifs_posix_convert_flags(unsigned int flags)
67 if ((flags & O_ACCMODE) == O_RDONLY)
68 posix_flags = SMB_O_RDONLY;
69 else if ((flags & O_ACCMODE) == O_WRONLY)
70 posix_flags = SMB_O_WRONLY;
71 else if ((flags & O_ACCMODE) == O_RDWR)
72 posix_flags = SMB_O_RDWR;
75 posix_flags |= SMB_O_CREAT;
77 posix_flags |= SMB_O_EXCL;
79 posix_flags |= SMB_O_TRUNC;
80 /* be safe and imply O_SYNC for O_DSYNC */
82 posix_flags |= SMB_O_SYNC;
83 if (flags & O_DIRECTORY)
84 posix_flags |= SMB_O_DIRECTORY;
85 if (flags & O_NOFOLLOW)
86 posix_flags |= SMB_O_NOFOLLOW;
88 posix_flags |= SMB_O_DIRECT;
93 static inline int cifs_get_disposition(unsigned int flags)
95 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
98 return FILE_OVERWRITE_IF;
99 else if ((flags & O_CREAT) == O_CREAT)
101 else if ((flags & O_TRUNC) == O_TRUNC)
102 return FILE_OVERWRITE;
107 int cifs_posix_open(char *full_path, struct inode **pinode,
108 struct super_block *sb, int mode, unsigned int f_flags,
109 __u32 *poplock, __u16 *pnetfid, int xid)
112 FILE_UNIX_BASIC_INFO *presp_data;
113 __u32 posix_flags = 0;
114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
115 struct cifs_fattr fattr;
116 struct tcon_link *tlink;
117 struct cifs_tcon *tcon;
119 cFYI(1, "posix open %s", full_path);
121 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
122 if (presp_data == NULL)
125 tlink = cifs_sb_tlink(cifs_sb);
131 tcon = tlink_tcon(tlink);
132 mode &= ~current_umask();
134 posix_flags = cifs_posix_convert_flags(f_flags);
135 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
136 poplock, full_path, cifs_sb->local_nls,
137 cifs_sb->mnt_cifs_flags &
138 CIFS_MOUNT_MAP_SPECIAL_CHR);
139 cifs_put_tlink(tlink);
144 if (presp_data->Type == cpu_to_le32(-1))
145 goto posix_open_ret; /* open ok, caller does qpathinfo */
148 goto posix_open_ret; /* caller does not need info */
150 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152 /* get new inode and set it up */
153 if (*pinode == NULL) {
154 cifs_fill_uniqueid(sb, &fattr);
155 *pinode = cifs_iget(sb, &fattr);
161 cifs_fattr_to_inode(*pinode, &fattr);
170 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid)
177 int create_options = CREATE_NOT_DIR;
180 desiredAccess = cifs_convert_flags(f_flags);
182 /*********************************************************************
183 * open flag mapping table:
185 * POSIX Flag CIFS Disposition
186 * ---------- ----------------
187 * O_CREAT FILE_OPEN_IF
188 * O_CREAT | O_EXCL FILE_CREATE
189 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
190 * O_TRUNC FILE_OVERWRITE
191 * none of the above FILE_OPEN
193 * Note that there is not a direct match between disposition
194 * FILE_SUPERSEDE (ie create whether or not file exists although
195 * O_CREAT | O_TRUNC is similar but truncates the existing
196 * file rather than creating a new file as FILE_SUPERSEDE does
197 * (which uses the attributes / metadata passed in on open call)
199 *? O_SYNC is a reasonable match to CIFS writethrough flag
200 *? and the read write flags match reasonably. O_LARGEFILE
201 *? is irrelevant because largefile support is always used
202 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
203 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
204 *********************************************************************/
206 disposition = cifs_get_disposition(f_flags);
208 /* BB pass O_SYNC flag through on file attributes .. BB */
210 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
214 if (backup_cred(cifs_sb))
215 create_options |= CREATE_OPEN_BACKUP_INTENT;
217 if (tcon->ses->capabilities & CAP_NT_SMBS)
218 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
219 desiredAccess, create_options, pnetfid, poplock, buf,
220 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
221 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
224 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
225 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
226 & CIFS_MOUNT_MAP_SPECIAL_CHR);
232 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
235 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
243 struct cifsFileInfo *
244 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
245 struct tcon_link *tlink, __u32 oplock)
247 struct dentry *dentry = file->f_path.dentry;
248 struct inode *inode = dentry->d_inode;
249 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
250 struct cifsFileInfo *pCifsFile;
252 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253 if (pCifsFile == NULL)
256 pCifsFile->count = 1;
257 pCifsFile->netfid = fileHandle;
258 pCifsFile->pid = current->tgid;
259 pCifsFile->uid = current_fsuid();
260 pCifsFile->dentry = dget(dentry);
261 pCifsFile->f_flags = file->f_flags;
262 pCifsFile->invalidHandle = false;
263 pCifsFile->tlink = cifs_get_tlink(tlink);
264 mutex_init(&pCifsFile->fh_mutex);
265 mutex_init(&pCifsFile->lock_mutex);
266 INIT_LIST_HEAD(&pCifsFile->llist);
267 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
269 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
271 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
276 spin_unlock(&cifs_file_list_lock);
278 cifs_set_oplock_level(pCifsInode, oplock);
280 file->private_data = pCifsFile;
285 * Release a reference on the file private data. This may involve closing
286 * the filehandle out on the server. Must be called without holding
287 * cifs_file_list_lock.
289 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
291 struct inode *inode = cifs_file->dentry->d_inode;
292 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
293 struct cifsInodeInfo *cifsi = CIFS_I(inode);
294 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
295 struct cifsLockInfo *li, *tmp;
297 spin_lock(&cifs_file_list_lock);
298 if (--cifs_file->count > 0) {
299 spin_unlock(&cifs_file_list_lock);
303 /* remove it from the lists */
304 list_del(&cifs_file->flist);
305 list_del(&cifs_file->tlist);
307 if (list_empty(&cifsi->openFileList)) {
308 cFYI(1, "closing last open instance for inode %p",
309 cifs_file->dentry->d_inode);
311 /* in strict cache mode we need invalidate mapping on the last
312 close because it may cause a error when we open this file
313 again and get at least level II oplock */
314 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
315 CIFS_I(inode)->invalid_mapping = true;
317 cifs_set_oplock_level(cifsi, 0);
319 spin_unlock(&cifs_file_list_lock);
321 cancel_work_sync(&cifs_file->oplock_break);
323 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
327 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
331 /* Delete any outstanding lock records. We'll lose them when the file
334 mutex_lock(&cifs_file->lock_mutex);
335 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
336 list_del(&li->llist);
339 mutex_unlock(&cifs_file->lock_mutex);
341 cifs_put_tlink(cifs_file->tlink);
342 dput(cifs_file->dentry);
346 int cifs_open(struct inode *inode, struct file *file)
351 struct cifs_sb_info *cifs_sb;
352 struct cifs_tcon *tcon;
353 struct tcon_link *tlink;
354 struct cifsFileInfo *pCifsFile = NULL;
355 char *full_path = NULL;
356 bool posix_open_ok = false;
361 cifs_sb = CIFS_SB(inode->i_sb);
362 tlink = cifs_sb_tlink(cifs_sb);
365 return PTR_ERR(tlink);
367 tcon = tlink_tcon(tlink);
369 full_path = build_path_from_dentry(file->f_path.dentry);
370 if (full_path == NULL) {
375 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
376 inode, file->f_flags, full_path);
383 if (!tcon->broken_posix_open && tcon->unix_ext &&
384 (tcon->ses->capabilities & CAP_UNIX) &&
385 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
386 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
387 /* can not refresh inode info since size could be stale */
388 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
389 cifs_sb->mnt_file_mode /* ignored */,
390 file->f_flags, &oplock, &netfid, xid);
392 cFYI(1, "posix open succeeded");
393 posix_open_ok = true;
394 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
395 if (tcon->ses->serverNOS)
396 cERROR(1, "server %s of type %s returned"
397 " unexpected error on SMB posix open"
398 ", disabling posix open support."
399 " Check if server update available.",
400 tcon->ses->serverName,
401 tcon->ses->serverNOS);
402 tcon->broken_posix_open = true;
403 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
404 (rc != -EOPNOTSUPP)) /* path not found or net err */
406 /* else fallthrough to retry open the old way on network i/o
410 if (!posix_open_ok) {
411 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
412 file->f_flags, &oplock, &netfid, xid);
417 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
418 if (pCifsFile == NULL) {
419 CIFSSMBClose(xid, tcon, netfid);
424 cifs_fscache_set_inode_cookie(inode, file);
426 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
427 /* time to set mode which we can not set earlier due to
428 problems creating new read-only files */
429 struct cifs_unix_set_info_args args = {
430 .mode = inode->i_mode,
433 .ctime = NO_CHANGE_64,
434 .atime = NO_CHANGE_64,
435 .mtime = NO_CHANGE_64,
438 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
445 cifs_put_tlink(tlink);
449 /* Try to reacquire byte range locks that were released when session */
450 /* to server was lost */
451 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
455 /* BB list all locks open on this file and relock */
460 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
465 struct cifs_sb_info *cifs_sb;
466 struct cifs_tcon *tcon;
467 struct cifsInodeInfo *pCifsInode;
469 char *full_path = NULL;
471 int disposition = FILE_OPEN;
472 int create_options = CREATE_NOT_DIR;
476 mutex_lock(&pCifsFile->fh_mutex);
477 if (!pCifsFile->invalidHandle) {
478 mutex_unlock(&pCifsFile->fh_mutex);
484 inode = pCifsFile->dentry->d_inode;
485 cifs_sb = CIFS_SB(inode->i_sb);
486 tcon = tlink_tcon(pCifsFile->tlink);
488 /* can not grab rename sem here because various ops, including
489 those that already have the rename sem can end up causing writepage
490 to get called and if the server was down that means we end up here,
491 and we can never tell if the caller already has the rename_sem */
492 full_path = build_path_from_dentry(pCifsFile->dentry);
493 if (full_path == NULL) {
495 mutex_unlock(&pCifsFile->fh_mutex);
500 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
501 inode, pCifsFile->f_flags, full_path);
508 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
509 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
510 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
513 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
514 * original open. Must mask them off for a reopen.
516 unsigned int oflags = pCifsFile->f_flags &
517 ~(O_CREAT | O_EXCL | O_TRUNC);
519 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
520 cifs_sb->mnt_file_mode /* ignored */,
521 oflags, &oplock, &netfid, xid);
523 cFYI(1, "posix reopen succeeded");
526 /* fallthrough to retry open the old way on errors, especially
527 in the reconnect path it is important to retry hard */
530 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
532 if (backup_cred(cifs_sb))
533 create_options |= CREATE_OPEN_BACKUP_INTENT;
535 /* Can not refresh inode by passing in file_info buf to be returned
536 by SMBOpen and then calling get_inode_info with returned buf
537 since file might have write behind data that needs to be flushed
538 and server version of file size can be stale. If we knew for sure
539 that inode was not dirty locally we could do this */
541 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
542 create_options, &netfid, &oplock, NULL,
543 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
544 CIFS_MOUNT_MAP_SPECIAL_CHR);
546 mutex_unlock(&pCifsFile->fh_mutex);
547 cFYI(1, "cifs_open returned 0x%x", rc);
548 cFYI(1, "oplock: %d", oplock);
549 goto reopen_error_exit;
553 pCifsFile->netfid = netfid;
554 pCifsFile->invalidHandle = false;
555 mutex_unlock(&pCifsFile->fh_mutex);
556 pCifsInode = CIFS_I(inode);
559 rc = filemap_write_and_wait(inode->i_mapping);
560 mapping_set_error(inode->i_mapping, rc);
563 rc = cifs_get_inode_info_unix(&inode,
564 full_path, inode->i_sb, xid);
566 rc = cifs_get_inode_info(&inode,
567 full_path, NULL, inode->i_sb,
569 } /* else we are writing out data to server already
570 and could deadlock if we tried to flush data, and
571 since we do not know if we have data that would
572 invalidate the current end of file on the server
573 we can not go to the server to get the new inod
576 cifs_set_oplock_level(pCifsInode, oplock);
578 cifs_relock_file(pCifsFile);
586 int cifs_close(struct inode *inode, struct file *file)
588 if (file->private_data != NULL) {
589 cifsFileInfo_put(file->private_data);
590 file->private_data = NULL;
593 /* return code from the ->release op is always ignored */
597 int cifs_closedir(struct inode *inode, struct file *file)
601 struct cifsFileInfo *pCFileStruct = file->private_data;
604 cFYI(1, "Closedir inode = 0x%p", inode);
609 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
611 cFYI(1, "Freeing private data in close dir");
612 spin_lock(&cifs_file_list_lock);
613 if (!pCFileStruct->srch_inf.endOfSearch &&
614 !pCFileStruct->invalidHandle) {
615 pCFileStruct->invalidHandle = true;
616 spin_unlock(&cifs_file_list_lock);
617 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
618 cFYI(1, "Closing uncompleted readdir with rc %d",
620 /* not much we can do if it fails anyway, ignore rc */
623 spin_unlock(&cifs_file_list_lock);
624 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
626 cFYI(1, "closedir free smb buf in srch struct");
627 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
628 if (pCFileStruct->srch_inf.smallBuf)
629 cifs_small_buf_release(ptmp);
631 cifs_buf_release(ptmp);
633 cifs_put_tlink(pCFileStruct->tlink);
634 kfree(file->private_data);
635 file->private_data = NULL;
637 /* BB can we lock the filestruct while this is going on? */
642 static int store_file_lock(struct cifsFileInfo *cfile, __u64 len,
643 __u64 offset, __u8 type, __u16 netfid)
645 struct cifsLockInfo *li =
646 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
652 li->pid = current->tgid;
653 mutex_lock(&cfile->lock_mutex);
654 list_add_tail(&li->llist, &cfile->llist);
655 mutex_unlock(&cfile->lock_mutex);
660 cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock,
663 if (flock->fl_flags & FL_POSIX)
665 if (flock->fl_flags & FL_FLOCK)
667 if (flock->fl_flags & FL_SLEEP) {
668 cFYI(1, "Blocking lock");
671 if (flock->fl_flags & FL_ACCESS)
672 cFYI(1, "Process suspended by mandatory locking - "
673 "not implemented yet");
674 if (flock->fl_flags & FL_LEASE)
675 cFYI(1, "Lease on file - not implemented yet");
676 if (flock->fl_flags &
677 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
678 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
680 *type = LOCKING_ANDX_LARGE_FILES;
681 if (flock->fl_type == F_WRLCK) {
684 } else if (flock->fl_type == F_UNLCK) {
687 /* Check if unlock includes more than one lock range */
688 } else if (flock->fl_type == F_RDLCK) {
690 *type |= LOCKING_ANDX_SHARED_LOCK;
692 } else if (flock->fl_type == F_EXLCK) {
695 } else if (flock->fl_type == F_SHLCK) {
697 *type |= LOCKING_ANDX_SHARED_LOCK;
700 cFYI(1, "Unknown type of lock");
704 cifs_getlk(struct cifsFileInfo *cfile, struct file_lock *flock, __u8 type,
705 bool wait_flag, bool posix_lck, int xid)
708 __u64 length = 1 + flock->fl_end - flock->fl_start;
709 __u16 netfid = cfile->netfid;
710 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
714 if (type & LOCKING_ANDX_SHARED_LOCK)
715 posix_lock_type = CIFS_RDLCK;
717 posix_lock_type = CIFS_WRLCK;
718 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
719 length, flock, posix_lock_type,
724 /* BB we could chain these into one lock request BB */
725 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
726 flock->fl_start, 0, 1, type, 0, 0);
728 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
729 length, flock->fl_start, 1, 0,
731 flock->fl_type = F_UNLCK;
733 cERROR(1, "Error unlocking previously locked "
734 "range %d during test of lock", rc);
739 if (type & LOCKING_ANDX_SHARED_LOCK) {
740 flock->fl_type = F_WRLCK;
745 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
746 flock->fl_start, 0, 1,
747 type | LOCKING_ANDX_SHARED_LOCK, 0, 0);
749 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
750 length, flock->fl_start, 1, 0,
751 type | LOCKING_ANDX_SHARED_LOCK,
753 flock->fl_type = F_RDLCK;
755 cERROR(1, "Error unlocking previously locked "
756 "range %d during test of lock", rc);
758 flock->fl_type = F_WRLCK;
765 cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
766 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
769 __u64 length = 1 + flock->fl_end - flock->fl_start;
770 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
771 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
772 __u16 netfid = cfile->netfid;
776 if (type & LOCKING_ANDX_SHARED_LOCK)
777 posix_lock_type = CIFS_RDLCK;
779 posix_lock_type = CIFS_WRLCK;
782 posix_lock_type = CIFS_UNLCK;
784 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */, length,
785 flock, posix_lock_type, wait_flag);
790 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
791 flock->fl_start, 0, lock, type, wait_flag, 0);
793 /* For Windows locks we must store them. */
794 rc = store_file_lock(cfile, length, flock->fl_start,
799 * For each stored lock that this unlock overlaps completely,
803 struct cifsLockInfo *li, *tmp;
805 mutex_lock(&cfile->lock_mutex);
806 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
807 if (flock->fl_start > li->offset ||
808 (flock->fl_start + length) <
809 (li->offset + li->length))
811 if (current->tgid != li->pid)
814 stored_rc = CIFSSMBLock(xid, tcon, netfid,
815 current->tgid, li->length,
816 li->offset, 1, 0, li->type,
821 list_del(&li->llist);
825 mutex_unlock(&cfile->lock_mutex);
828 if (flock->fl_flags & FL_POSIX)
829 posix_lock_file_wait(file, flock);
833 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
836 int lock = 0, unlock = 0;
837 bool wait_flag = false;
838 bool posix_lck = false;
839 struct cifs_sb_info *cifs_sb;
840 struct cifs_tcon *tcon;
841 struct cifsInodeInfo *cinode;
842 struct cifsFileInfo *cfile;
849 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
850 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
851 flock->fl_start, flock->fl_end);
853 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag);
855 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
856 cfile = (struct cifsFileInfo *)file->private_data;
857 tcon = tlink_tcon(cfile->tlink);
858 netfid = cfile->netfid;
859 cinode = CIFS_I(file->f_path.dentry->d_inode);
861 if ((tcon->ses->capabilities & CAP_UNIX) &&
862 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
863 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
866 * BB add code here to normalize offset and length to account for
867 * negative length which we can not accept over the wire.
870 rc = cifs_getlk(cfile, flock, type, wait_flag, posix_lck, xid);
875 if (!lock && !unlock) {
877 * if no lock or unlock then nothing to do since we do not
884 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
890 /* update the file size (if needed) after a write */
892 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
893 unsigned int bytes_written)
895 loff_t end_of_write = offset + bytes_written;
897 if (end_of_write > cifsi->server_eof)
898 cifsi->server_eof = end_of_write;
901 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
902 const char *write_data, size_t write_size,
906 unsigned int bytes_written = 0;
907 unsigned int total_written;
908 struct cifs_sb_info *cifs_sb;
909 struct cifs_tcon *pTcon;
911 struct dentry *dentry = open_file->dentry;
912 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
913 struct cifs_io_parms io_parms;
915 cifs_sb = CIFS_SB(dentry->d_sb);
917 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
918 *poffset, dentry->d_name.name);
920 pTcon = tlink_tcon(open_file->tlink);
924 for (total_written = 0; write_size > total_written;
925 total_written += bytes_written) {
927 while (rc == -EAGAIN) {
931 if (open_file->invalidHandle) {
932 /* we could deadlock if we called
933 filemap_fdatawait from here so tell
934 reopen_file not to flush data to
936 rc = cifs_reopen_file(open_file, false);
941 len = min((size_t)cifs_sb->wsize,
942 write_size - total_written);
943 /* iov[0] is reserved for smb header */
944 iov[1].iov_base = (char *)write_data + total_written;
945 iov[1].iov_len = len;
946 io_parms.netfid = open_file->netfid;
948 io_parms.tcon = pTcon;
949 io_parms.offset = *poffset;
950 io_parms.length = len;
951 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
954 if (rc || (bytes_written == 0)) {
962 cifs_update_eof(cifsi, *poffset, bytes_written);
963 *poffset += bytes_written;
967 cifs_stats_bytes_written(pTcon, total_written);
969 if (total_written > 0) {
970 spin_lock(&dentry->d_inode->i_lock);
971 if (*poffset > dentry->d_inode->i_size)
972 i_size_write(dentry->d_inode, *poffset);
973 spin_unlock(&dentry->d_inode->i_lock);
975 mark_inode_dirty_sync(dentry->d_inode);
977 return total_written;
980 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
983 struct cifsFileInfo *open_file = NULL;
984 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
986 /* only filter by fsuid on multiuser mounts */
987 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
990 spin_lock(&cifs_file_list_lock);
991 /* we could simply get the first_list_entry since write-only entries
992 are always at the end of the list but since the first entry might
993 have a close pending, we go through the whole list */
994 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
995 if (fsuid_only && open_file->uid != current_fsuid())
997 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
998 if (!open_file->invalidHandle) {
999 /* found a good file */
1000 /* lock it so it will not be closed on us */
1001 cifsFileInfo_get(open_file);
1002 spin_unlock(&cifs_file_list_lock);
1004 } /* else might as well continue, and look for
1005 another, or simply have the caller reopen it
1006 again rather than trying to fix this handle */
1007 } else /* write only file */
1008 break; /* write only files are last so must be done */
1010 spin_unlock(&cifs_file_list_lock);
1014 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1017 struct cifsFileInfo *open_file;
1018 struct cifs_sb_info *cifs_sb;
1019 bool any_available = false;
1022 /* Having a null inode here (because mapping->host was set to zero by
1023 the VFS or MM) should not happen but we had reports of on oops (due to
1024 it being zero) during stress testcases so we need to check for it */
1026 if (cifs_inode == NULL) {
1027 cERROR(1, "Null inode passed to cifs_writeable_file");
1032 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1034 /* only filter by fsuid on multiuser mounts */
1035 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1038 spin_lock(&cifs_file_list_lock);
1040 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1041 if (!any_available && open_file->pid != current->tgid)
1043 if (fsuid_only && open_file->uid != current_fsuid())
1045 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1046 cifsFileInfo_get(open_file);
1048 if (!open_file->invalidHandle) {
1049 /* found a good writable file */
1050 spin_unlock(&cifs_file_list_lock);
1054 spin_unlock(&cifs_file_list_lock);
1056 /* Had to unlock since following call can block */
1057 rc = cifs_reopen_file(open_file, false);
1061 /* if it fails, try another handle if possible */
1062 cFYI(1, "wp failed on reopen file");
1063 cifsFileInfo_put(open_file);
1065 spin_lock(&cifs_file_list_lock);
1067 /* else we simply continue to the next entry. Thus
1068 we do not loop on reopen errors. If we
1069 can not reopen the file, for example if we
1070 reconnected to a server with another client
1071 racing to delete or lock the file we would not
1072 make progress if we restarted before the beginning
1073 of the loop here. */
1076 /* couldn't find useable FH with same pid, try any available */
1077 if (!any_available) {
1078 any_available = true;
1079 goto refind_writable;
1081 spin_unlock(&cifs_file_list_lock);
1085 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1087 struct address_space *mapping = page->mapping;
1088 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1091 int bytes_written = 0;
1092 struct inode *inode;
1093 struct cifsFileInfo *open_file;
1095 if (!mapping || !mapping->host)
1098 inode = page->mapping->host;
1100 offset += (loff_t)from;
1101 write_data = kmap(page);
1104 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1109 /* racing with truncate? */
1110 if (offset > mapping->host->i_size) {
1112 return 0; /* don't care */
1115 /* check to make sure that we are not extending the file */
1116 if (mapping->host->i_size - offset < (loff_t)to)
1117 to = (unsigned)(mapping->host->i_size - offset);
1119 open_file = find_writable_file(CIFS_I(mapping->host), false);
1121 bytes_written = cifs_write(open_file, open_file->pid,
1122 write_data, to - from, &offset);
1123 cifsFileInfo_put(open_file);
1124 /* Does mm or vfs already set times? */
1125 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1126 if ((bytes_written > 0) && (offset))
1128 else if (bytes_written < 0)
1131 cFYI(1, "No writeable filehandles for inode");
1139 static int cifs_writepages(struct address_space *mapping,
1140 struct writeback_control *wbc)
1142 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1143 bool done = false, scanned = false, range_whole = false;
1145 struct cifs_writedata *wdata;
1150 * If wsize is smaller than the page cache size, default to writing
1151 * one page at a time via cifs_writepage
1153 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1154 return generic_writepages(mapping, wbc);
1156 if (wbc->range_cyclic) {
1157 index = mapping->writeback_index; /* Start from prev offset */
1160 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1161 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1162 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1167 while (!done && index <= end) {
1168 unsigned int i, nr_pages, found_pages;
1169 pgoff_t next = 0, tofind;
1170 struct page **pages;
1172 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1175 wdata = cifs_writedata_alloc((unsigned int)tofind);
1182 * find_get_pages_tag seems to return a max of 256 on each
1183 * iteration, so we must call it several times in order to
1184 * fill the array or the wsize is effectively limited to
1185 * 256 * PAGE_CACHE_SIZE.
1188 pages = wdata->pages;
1190 nr_pages = find_get_pages_tag(mapping, &index,
1191 PAGECACHE_TAG_DIRTY,
1193 found_pages += nr_pages;
1196 } while (nr_pages && tofind && index <= end);
1198 if (found_pages == 0) {
1199 kref_put(&wdata->refcount, cifs_writedata_release);
1204 for (i = 0; i < found_pages; i++) {
1205 page = wdata->pages[i];
1207 * At this point we hold neither mapping->tree_lock nor
1208 * lock on the page itself: the page may be truncated or
1209 * invalidated (changing page->mapping to NULL), or even
1210 * swizzled back from swapper_space to tmpfs file
1216 else if (!trylock_page(page))
1219 if (unlikely(page->mapping != mapping)) {
1224 if (!wbc->range_cyclic && page->index > end) {
1230 if (next && (page->index != next)) {
1231 /* Not next consecutive page */
1236 if (wbc->sync_mode != WB_SYNC_NONE)
1237 wait_on_page_writeback(page);
1239 if (PageWriteback(page) ||
1240 !clear_page_dirty_for_io(page)) {
1246 * This actually clears the dirty bit in the radix tree.
1247 * See cifs_writepage() for more commentary.
1249 set_page_writeback(page);
1251 if (page_offset(page) >= mapping->host->i_size) {
1254 end_page_writeback(page);
1258 wdata->pages[i] = page;
1259 next = page->index + 1;
1263 /* reset index to refind any pages skipped */
1265 index = wdata->pages[0]->index + 1;
1267 /* put any pages we aren't going to use */
1268 for (i = nr_pages; i < found_pages; i++) {
1269 page_cache_release(wdata->pages[i]);
1270 wdata->pages[i] = NULL;
1273 /* nothing to write? */
1274 if (nr_pages == 0) {
1275 kref_put(&wdata->refcount, cifs_writedata_release);
1279 wdata->sync_mode = wbc->sync_mode;
1280 wdata->nr_pages = nr_pages;
1281 wdata->offset = page_offset(wdata->pages[0]);
1284 if (wdata->cfile != NULL)
1285 cifsFileInfo_put(wdata->cfile);
1286 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1288 if (!wdata->cfile) {
1289 cERROR(1, "No writable handles for inode");
1293 rc = cifs_async_writev(wdata);
1294 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1296 for (i = 0; i < nr_pages; ++i)
1297 unlock_page(wdata->pages[i]);
1299 /* send failure -- clean up the mess */
1301 for (i = 0; i < nr_pages; ++i) {
1303 redirty_page_for_writepage(wbc,
1306 SetPageError(wdata->pages[i]);
1307 end_page_writeback(wdata->pages[i]);
1308 page_cache_release(wdata->pages[i]);
1311 mapping_set_error(mapping, rc);
1313 kref_put(&wdata->refcount, cifs_writedata_release);
1315 wbc->nr_to_write -= nr_pages;
1316 if (wbc->nr_to_write <= 0)
1322 if (!scanned && !done) {
1324 * We hit the last page and there is more work to be done: wrap
1325 * back to the start of the file
1332 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1333 mapping->writeback_index = index;
1339 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1345 /* BB add check for wbc flags */
1346 page_cache_get(page);
1347 if (!PageUptodate(page))
1348 cFYI(1, "ppw - page not up to date");
1351 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1353 * A writepage() implementation always needs to do either this,
1354 * or re-dirty the page with "redirty_page_for_writepage()" in
1355 * the case of a failure.
1357 * Just unlocking the page will cause the radix tree tag-bits
1358 * to fail to update with the state of the page correctly.
1360 set_page_writeback(page);
1362 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1363 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1365 else if (rc == -EAGAIN)
1366 redirty_page_for_writepage(wbc, page);
1370 SetPageUptodate(page);
1371 end_page_writeback(page);
1372 page_cache_release(page);
1377 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1379 int rc = cifs_writepage_locked(page, wbc);
1384 static int cifs_write_end(struct file *file, struct address_space *mapping,
1385 loff_t pos, unsigned len, unsigned copied,
1386 struct page *page, void *fsdata)
1389 struct inode *inode = mapping->host;
1390 struct cifsFileInfo *cfile = file->private_data;
1391 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1394 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1397 pid = current->tgid;
1399 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1402 if (PageChecked(page)) {
1404 SetPageUptodate(page);
1405 ClearPageChecked(page);
1406 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1407 SetPageUptodate(page);
1409 if (!PageUptodate(page)) {
1411 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1415 /* this is probably better than directly calling
1416 partialpage_write since in this function the file handle is
1417 known which we might as well leverage */
1418 /* BB check if anything else missing out of ppw
1419 such as updating last write time */
1420 page_data = kmap(page);
1421 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1422 /* if (rc < 0) should we set writebehind rc? */
1429 set_page_dirty(page);
1433 spin_lock(&inode->i_lock);
1434 if (pos > inode->i_size)
1435 i_size_write(inode, pos);
1436 spin_unlock(&inode->i_lock);
1440 page_cache_release(page);
1445 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
1450 struct cifs_tcon *tcon;
1451 struct cifsFileInfo *smbfile = file->private_data;
1452 struct inode *inode = file->f_path.dentry->d_inode;
1453 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1455 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1458 mutex_lock(&inode->i_mutex);
1462 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1463 file->f_path.dentry->d_name.name, datasync);
1465 if (!CIFS_I(inode)->clientCanCacheRead) {
1466 rc = cifs_invalidate_mapping(inode);
1468 cFYI(1, "rc: %d during invalidate phase", rc);
1469 rc = 0; /* don't care about it in fsync */
1473 tcon = tlink_tcon(smbfile->tlink);
1474 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1475 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1478 mutex_unlock(&inode->i_mutex);
1482 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1486 struct cifs_tcon *tcon;
1487 struct cifsFileInfo *smbfile = file->private_data;
1488 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1489 struct inode *inode = file->f_mapping->host;
1491 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1494 mutex_lock(&inode->i_mutex);
1498 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1499 file->f_path.dentry->d_name.name, datasync);
1501 tcon = tlink_tcon(smbfile->tlink);
1502 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1503 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1506 mutex_unlock(&inode->i_mutex);
1511 * As file closes, flush all cached write data for this inode checking
1512 * for write behind errors.
1514 int cifs_flush(struct file *file, fl_owner_t id)
1516 struct inode *inode = file->f_path.dentry->d_inode;
1519 if (file->f_mode & FMODE_WRITE)
1520 rc = filemap_write_and_wait(inode->i_mapping);
1522 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1528 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1533 for (i = 0; i < num_pages; i++) {
1534 pages[i] = alloc_page(__GFP_HIGHMEM);
1537 * save number of pages we have already allocated and
1538 * return with ENOMEM error
1549 for (i = 0; i < num_pages; i++)
1555 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1560 clen = min_t(const size_t, len, wsize);
1561 num_pages = clen / PAGE_CACHE_SIZE;
1562 if (clen % PAGE_CACHE_SIZE)
1572 cifs_iovec_write(struct file *file, const struct iovec *iov,
1573 unsigned long nr_segs, loff_t *poffset)
1575 unsigned int written;
1576 unsigned long num_pages, npages, i;
1577 size_t copied, len, cur_len;
1578 ssize_t total_written = 0;
1579 struct kvec *to_send;
1580 struct page **pages;
1582 struct inode *inode;
1583 struct cifsFileInfo *open_file;
1584 struct cifs_tcon *pTcon;
1585 struct cifs_sb_info *cifs_sb;
1586 struct cifs_io_parms io_parms;
1590 len = iov_length(iov, nr_segs);
1594 rc = generic_write_checks(file, poffset, &len, 0);
1598 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1599 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
1601 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
1605 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
1611 rc = cifs_write_allocate_pages(pages, num_pages);
1619 open_file = file->private_data;
1621 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1622 pid = open_file->pid;
1624 pid = current->tgid;
1626 pTcon = tlink_tcon(open_file->tlink);
1627 inode = file->f_path.dentry->d_inode;
1629 iov_iter_init(&it, iov, nr_segs, len, 0);
1633 size_t save_len = cur_len;
1634 for (i = 0; i < npages; i++) {
1635 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
1636 copied = iov_iter_copy_from_user(pages[i], &it, 0,
1639 iov_iter_advance(&it, copied);
1640 to_send[i+1].iov_base = kmap(pages[i]);
1641 to_send[i+1].iov_len = copied;
1644 cur_len = save_len - cur_len;
1647 if (open_file->invalidHandle) {
1648 rc = cifs_reopen_file(open_file, false);
1652 io_parms.netfid = open_file->netfid;
1654 io_parms.tcon = pTcon;
1655 io_parms.offset = *poffset;
1656 io_parms.length = cur_len;
1657 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
1659 } while (rc == -EAGAIN);
1661 for (i = 0; i < npages; i++)
1666 total_written += written;
1667 cifs_update_eof(CIFS_I(inode), *poffset, written);
1668 *poffset += written;
1669 } else if (rc < 0) {
1675 /* get length and number of kvecs of the next write */
1676 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
1679 if (total_written > 0) {
1680 spin_lock(&inode->i_lock);
1681 if (*poffset > inode->i_size)
1682 i_size_write(inode, *poffset);
1683 spin_unlock(&inode->i_lock);
1686 cifs_stats_bytes_written(pTcon, total_written);
1687 mark_inode_dirty_sync(inode);
1689 for (i = 0; i < num_pages; i++)
1694 return total_written;
1697 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1698 unsigned long nr_segs, loff_t pos)
1701 struct inode *inode;
1703 inode = iocb->ki_filp->f_path.dentry->d_inode;
1706 * BB - optimize the way when signing is disabled. We can drop this
1707 * extra memory-to-memory copying and use iovec buffers for constructing
1711 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
1713 CIFS_I(inode)->invalid_mapping = true;
1720 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
1721 unsigned long nr_segs, loff_t pos)
1723 struct inode *inode;
1725 inode = iocb->ki_filp->f_path.dentry->d_inode;
1727 if (CIFS_I(inode)->clientCanCacheAll)
1728 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1731 * In strict cache mode we need to write the data to the server exactly
1732 * from the pos to pos+len-1 rather than flush all affected pages
1733 * because it may cause a error with mandatory locks on these pages but
1734 * not on the region from pos to ppos+len-1.
1737 return cifs_user_writev(iocb, iov, nr_segs, pos);
1741 cifs_iovec_read(struct file *file, const struct iovec *iov,
1742 unsigned long nr_segs, loff_t *poffset)
1747 unsigned int bytes_read = 0;
1748 size_t len, cur_len;
1750 struct cifs_sb_info *cifs_sb;
1751 struct cifs_tcon *pTcon;
1752 struct cifsFileInfo *open_file;
1753 struct smb_com_read_rsp *pSMBr;
1754 struct cifs_io_parms io_parms;
1761 len = iov_length(iov, nr_segs);
1766 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1768 open_file = file->private_data;
1769 pTcon = tlink_tcon(open_file->tlink);
1771 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1772 pid = open_file->pid;
1774 pid = current->tgid;
1776 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1777 cFYI(1, "attempting read on write only file instance");
1779 for (total_read = 0; total_read < len; total_read += bytes_read) {
1780 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
1784 while (rc == -EAGAIN) {
1785 int buf_type = CIFS_NO_BUFFER;
1786 if (open_file->invalidHandle) {
1787 rc = cifs_reopen_file(open_file, true);
1791 io_parms.netfid = open_file->netfid;
1793 io_parms.tcon = pTcon;
1794 io_parms.offset = *poffset;
1795 io_parms.length = cur_len;
1796 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1797 &read_data, &buf_type);
1798 pSMBr = (struct smb_com_read_rsp *)read_data;
1800 char *data_offset = read_data + 4 +
1801 le16_to_cpu(pSMBr->DataOffset);
1802 if (memcpy_toiovecend(iov, data_offset,
1803 iov_offset, bytes_read))
1805 if (buf_type == CIFS_SMALL_BUFFER)
1806 cifs_small_buf_release(read_data);
1807 else if (buf_type == CIFS_LARGE_BUFFER)
1808 cifs_buf_release(read_data);
1810 iov_offset += bytes_read;
1814 if (rc || (bytes_read == 0)) {
1822 cifs_stats_bytes_read(pTcon, bytes_read);
1823 *poffset += bytes_read;
1831 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1832 unsigned long nr_segs, loff_t pos)
1836 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
1843 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
1844 unsigned long nr_segs, loff_t pos)
1846 struct inode *inode;
1848 inode = iocb->ki_filp->f_path.dentry->d_inode;
1850 if (CIFS_I(inode)->clientCanCacheRead)
1851 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1854 * In strict cache mode we need to read from the server all the time
1855 * if we don't have level II oplock because the server can delay mtime
1856 * change - so we can't make a decision about inode invalidating.
1857 * And we can also fail with pagereading if there are mandatory locks
1858 * on pages affected by this read but not on the region from pos to
1862 return cifs_user_readv(iocb, iov, nr_segs, pos);
1865 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1869 unsigned int bytes_read = 0;
1870 unsigned int total_read;
1871 unsigned int current_read_size;
1872 struct cifs_sb_info *cifs_sb;
1873 struct cifs_tcon *pTcon;
1875 char *current_offset;
1876 struct cifsFileInfo *open_file;
1877 struct cifs_io_parms io_parms;
1878 int buf_type = CIFS_NO_BUFFER;
1882 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1884 if (file->private_data == NULL) {
1889 open_file = file->private_data;
1890 pTcon = tlink_tcon(open_file->tlink);
1892 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1893 pid = open_file->pid;
1895 pid = current->tgid;
1897 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1898 cFYI(1, "attempting read on write only file instance");
1900 for (total_read = 0, current_offset = read_data;
1901 read_size > total_read;
1902 total_read += bytes_read, current_offset += bytes_read) {
1903 current_read_size = min_t(const int, read_size - total_read,
1905 /* For windows me and 9x we do not want to request more
1906 than it negotiated since it will refuse the read then */
1908 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1909 current_read_size = min_t(const int, current_read_size,
1913 while (rc == -EAGAIN) {
1914 if (open_file->invalidHandle) {
1915 rc = cifs_reopen_file(open_file, true);
1919 io_parms.netfid = open_file->netfid;
1921 io_parms.tcon = pTcon;
1922 io_parms.offset = *poffset;
1923 io_parms.length = current_read_size;
1924 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1925 ¤t_offset, &buf_type);
1927 if (rc || (bytes_read == 0)) {
1935 cifs_stats_bytes_read(pTcon, total_read);
1936 *poffset += bytes_read;
1944 * If the page is mmap'ed into a process' page tables, then we need to make
1945 * sure that it doesn't change while being written back.
1948 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1950 struct page *page = vmf->page;
1953 return VM_FAULT_LOCKED;
1956 static struct vm_operations_struct cifs_file_vm_ops = {
1957 .fault = filemap_fault,
1958 .page_mkwrite = cifs_page_mkwrite,
1961 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1964 struct inode *inode = file->f_path.dentry->d_inode;
1968 if (!CIFS_I(inode)->clientCanCacheRead) {
1969 rc = cifs_invalidate_mapping(inode);
1974 rc = generic_file_mmap(file, vma);
1976 vma->vm_ops = &cifs_file_vm_ops;
1981 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1986 rc = cifs_revalidate_file(file);
1988 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1992 rc = generic_file_mmap(file, vma);
1994 vma->vm_ops = &cifs_file_vm_ops;
2000 static void cifs_copy_cache_pages(struct address_space *mapping,
2001 struct list_head *pages, int bytes_read, char *data)
2006 while (bytes_read > 0) {
2007 if (list_empty(pages))
2010 page = list_entry(pages->prev, struct page, lru);
2011 list_del(&page->lru);
2013 if (add_to_page_cache_lru(page, mapping, page->index,
2015 page_cache_release(page);
2016 cFYI(1, "Add page cache failed");
2017 data += PAGE_CACHE_SIZE;
2018 bytes_read -= PAGE_CACHE_SIZE;
2021 page_cache_release(page);
2023 target = kmap_atomic(page, KM_USER0);
2025 if (PAGE_CACHE_SIZE > bytes_read) {
2026 memcpy(target, data, bytes_read);
2027 /* zero the tail end of this partial page */
2028 memset(target + bytes_read, 0,
2029 PAGE_CACHE_SIZE - bytes_read);
2032 memcpy(target, data, PAGE_CACHE_SIZE);
2033 bytes_read -= PAGE_CACHE_SIZE;
2035 kunmap_atomic(target, KM_USER0);
2037 flush_dcache_page(page);
2038 SetPageUptodate(page);
2040 data += PAGE_CACHE_SIZE;
2042 /* add page to FS-Cache */
2043 cifs_readpage_to_fscache(mapping->host, page);
2048 static int cifs_readpages(struct file *file, struct address_space *mapping,
2049 struct list_head *page_list, unsigned num_pages)
2055 struct cifs_sb_info *cifs_sb;
2056 struct cifs_tcon *pTcon;
2057 unsigned int bytes_read = 0;
2058 unsigned int read_size, i;
2059 char *smb_read_data = NULL;
2060 struct smb_com_read_rsp *pSMBr;
2061 struct cifsFileInfo *open_file;
2062 struct cifs_io_parms io_parms;
2063 int buf_type = CIFS_NO_BUFFER;
2067 if (file->private_data == NULL) {
2072 open_file = file->private_data;
2073 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2074 pTcon = tlink_tcon(open_file->tlink);
2077 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2078 * immediately if the cookie is negative
2080 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2085 cFYI(DBG2, "rpages: num pages %d", num_pages);
2086 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2087 pid = open_file->pid;
2089 pid = current->tgid;
2091 for (i = 0; i < num_pages; ) {
2092 unsigned contig_pages;
2093 struct page *tmp_page;
2094 unsigned long expected_index;
2096 if (list_empty(page_list))
2099 page = list_entry(page_list->prev, struct page, lru);
2100 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2102 /* count adjacent pages that we will read into */
2105 list_entry(page_list->prev, struct page, lru)->index;
2106 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2107 if (tmp_page->index == expected_index) {
2113 if (contig_pages + i > num_pages)
2114 contig_pages = num_pages - i;
2116 /* for reads over a certain size could initiate async
2119 read_size = contig_pages * PAGE_CACHE_SIZE;
2120 /* Read size needs to be in multiples of one page */
2121 read_size = min_t(const unsigned int, read_size,
2122 cifs_sb->rsize & PAGE_CACHE_MASK);
2123 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2124 read_size, contig_pages);
2126 while (rc == -EAGAIN) {
2127 if (open_file->invalidHandle) {
2128 rc = cifs_reopen_file(open_file, true);
2132 io_parms.netfid = open_file->netfid;
2134 io_parms.tcon = pTcon;
2135 io_parms.offset = offset;
2136 io_parms.length = read_size;
2137 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
2138 &smb_read_data, &buf_type);
2139 /* BB more RC checks ? */
2140 if (rc == -EAGAIN) {
2141 if (smb_read_data) {
2142 if (buf_type == CIFS_SMALL_BUFFER)
2143 cifs_small_buf_release(smb_read_data);
2144 else if (buf_type == CIFS_LARGE_BUFFER)
2145 cifs_buf_release(smb_read_data);
2146 smb_read_data = NULL;
2150 if ((rc < 0) || (smb_read_data == NULL)) {
2151 cFYI(1, "Read error in readpages: %d", rc);
2153 } else if (bytes_read > 0) {
2154 task_io_account_read(bytes_read);
2155 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2156 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2157 smb_read_data + 4 /* RFC1001 hdr */ +
2158 le16_to_cpu(pSMBr->DataOffset));
2160 i += bytes_read >> PAGE_CACHE_SHIFT;
2161 cifs_stats_bytes_read(pTcon, bytes_read);
2162 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2163 i++; /* account for partial page */
2165 /* server copy of file can have smaller size
2167 /* BB do we need to verify this common case ?
2168 this case is ok - if we are at server EOF
2169 we will hit it on next read */
2174 cFYI(1, "No bytes read (%d) at offset %lld . "
2175 "Cleaning remaining pages from readahead list",
2176 bytes_read, offset);
2177 /* BB turn off caching and do new lookup on
2178 file size at server? */
2181 if (smb_read_data) {
2182 if (buf_type == CIFS_SMALL_BUFFER)
2183 cifs_small_buf_release(smb_read_data);
2184 else if (buf_type == CIFS_LARGE_BUFFER)
2185 cifs_buf_release(smb_read_data);
2186 smb_read_data = NULL;
2191 /* need to free smb_read_data buf before exit */
2192 if (smb_read_data) {
2193 if (buf_type == CIFS_SMALL_BUFFER)
2194 cifs_small_buf_release(smb_read_data);
2195 else if (buf_type == CIFS_LARGE_BUFFER)
2196 cifs_buf_release(smb_read_data);
2197 smb_read_data = NULL;
2205 static int cifs_readpage_worker(struct file *file, struct page *page,
2211 /* Is the page cached? */
2212 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2216 page_cache_get(page);
2217 read_data = kmap(page);
2218 /* for reads over a certain size could initiate async read ahead */
2220 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2225 cFYI(1, "Bytes read %d", rc);
2227 file->f_path.dentry->d_inode->i_atime =
2228 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2230 if (PAGE_CACHE_SIZE > rc)
2231 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2233 flush_dcache_page(page);
2234 SetPageUptodate(page);
2236 /* send this page to the cache */
2237 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2243 page_cache_release(page);
2249 static int cifs_readpage(struct file *file, struct page *page)
2251 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2257 if (file->private_data == NULL) {
2263 cFYI(1, "readpage %p at offset %d 0x%x\n",
2264 page, (int)offset, (int)offset);
2266 rc = cifs_readpage_worker(file, page, &offset);
2274 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2276 struct cifsFileInfo *open_file;
2278 spin_lock(&cifs_file_list_lock);
2279 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2280 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2281 spin_unlock(&cifs_file_list_lock);
2285 spin_unlock(&cifs_file_list_lock);
2289 /* We do not want to update the file size from server for inodes
2290 open for write - to avoid races with writepage extending
2291 the file - in the future we could consider allowing
2292 refreshing the inode only on increases in the file size
2293 but this is tricky to do without racing with writebehind
2294 page caching in the current Linux kernel design */
2295 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2300 if (is_inode_writable(cifsInode)) {
2301 /* This inode is open for write at least once */
2302 struct cifs_sb_info *cifs_sb;
2304 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2305 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2306 /* since no page cache to corrupt on directio
2307 we can change size safely */
2311 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2319 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2320 loff_t pos, unsigned len, unsigned flags,
2321 struct page **pagep, void **fsdata)
2323 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2324 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2325 loff_t page_start = pos & PAGE_MASK;
2330 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2332 page = grab_cache_page_write_begin(mapping, index, flags);
2338 if (PageUptodate(page))
2342 * If we write a full page it will be up to date, no need to read from
2343 * the server. If the write is short, we'll end up doing a sync write
2346 if (len == PAGE_CACHE_SIZE)
2350 * optimize away the read when we have an oplock, and we're not
2351 * expecting to use any of the data we'd be reading in. That
2352 * is, when the page lies beyond the EOF, or straddles the EOF
2353 * and the write will cover all of the existing data.
2355 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2356 i_size = i_size_read(mapping->host);
2357 if (page_start >= i_size ||
2358 (offset == 0 && (pos + len) >= i_size)) {
2359 zero_user_segments(page, 0, offset,
2363 * PageChecked means that the parts of the page
2364 * to which we're not writing are considered up
2365 * to date. Once the data is copied to the
2366 * page, it can be set uptodate.
2368 SetPageChecked(page);
2373 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2375 * might as well read a page, it is fast enough. If we get
2376 * an error, we don't need to return it. cifs_write_end will
2377 * do a sync write instead since PG_uptodate isn't set.
2379 cifs_readpage_worker(file, page, &page_start);
2381 /* we could try using another file handle if there is one -
2382 but how would we lock it to prevent close of that handle
2383 racing with this read? In any case
2384 this will be written out by write_end so is fine */
2391 static int cifs_release_page(struct page *page, gfp_t gfp)
2393 if (PagePrivate(page))
2396 return cifs_fscache_release_page(page, gfp);
2399 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2401 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2404 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2407 static int cifs_launder_page(struct page *page)
2410 loff_t range_start = page_offset(page);
2411 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2412 struct writeback_control wbc = {
2413 .sync_mode = WB_SYNC_ALL,
2415 .range_start = range_start,
2416 .range_end = range_end,
2419 cFYI(1, "Launder page: %p", page);
2421 if (clear_page_dirty_for_io(page))
2422 rc = cifs_writepage_locked(page, &wbc);
2424 cifs_fscache_invalidate_page(page, page->mapping->host);
2428 void cifs_oplock_break(struct work_struct *work)
2430 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2432 struct inode *inode = cfile->dentry->d_inode;
2433 struct cifsInodeInfo *cinode = CIFS_I(inode);
2436 if (inode && S_ISREG(inode->i_mode)) {
2437 if (cinode->clientCanCacheRead)
2438 break_lease(inode, O_RDONLY);
2440 break_lease(inode, O_WRONLY);
2441 rc = filemap_fdatawrite(inode->i_mapping);
2442 if (cinode->clientCanCacheRead == 0) {
2443 rc = filemap_fdatawait(inode->i_mapping);
2444 mapping_set_error(inode->i_mapping, rc);
2445 invalidate_remote_inode(inode);
2447 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2451 * releasing stale oplock after recent reconnect of smb session using
2452 * a now incorrect file handle is not a data integrity issue but do
2453 * not bother sending an oplock release if session to server still is
2454 * disconnected since oplock already released by the server
2456 if (!cfile->oplock_break_cancelled) {
2457 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
2458 current->tgid, 0, 0, 0, 0,
2459 LOCKING_ANDX_OPLOCK_RELEASE, false,
2460 cinode->clientCanCacheRead ? 1 : 0);
2461 cFYI(1, "Oplock release rc = %d", rc);
2465 const struct address_space_operations cifs_addr_ops = {
2466 .readpage = cifs_readpage,
2467 .readpages = cifs_readpages,
2468 .writepage = cifs_writepage,
2469 .writepages = cifs_writepages,
2470 .write_begin = cifs_write_begin,
2471 .write_end = cifs_write_end,
2472 .set_page_dirty = __set_page_dirty_nobuffers,
2473 .releasepage = cifs_release_page,
2474 .invalidatepage = cifs_invalidate_page,
2475 .launder_page = cifs_launder_page,
2479 * cifs_readpages requires the server to support a buffer large enough to
2480 * contain the header plus one complete page of data. Otherwise, we need
2481 * to leave cifs_readpages out of the address space operations.
2483 const struct address_space_operations cifs_addr_ops_smallbuf = {
2484 .readpage = cifs_readpage,
2485 .writepage = cifs_writepage,
2486 .writepages = cifs_writepages,
2487 .write_begin = cifs_write_begin,
2488 .write_end = cifs_write_end,
2489 .set_page_dirty = __set_page_dirty_nobuffers,
2490 .releasepage = cifs_release_page,
2491 .invalidatepage = cifs_invalidate_page,
2492 .launder_page = cifs_launder_page,