Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq
[sfrench/cifs-2.6.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
36 #include "cifsfs.h"
37 #include "cifspdu.h"
38 #include "cifsglob.h"
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
43 #include "fscache.h"
44
45 static inline int cifs_convert_flags(unsigned int flags)
46 {
47         if ((flags & O_ACCMODE) == O_RDONLY)
48                 return GENERIC_READ;
49         else if ((flags & O_ACCMODE) == O_WRONLY)
50                 return GENERIC_WRITE;
51         else if ((flags & O_ACCMODE) == O_RDWR) {
52                 /* GENERIC_ALL is too much permission to request
53                    can cause unnecessary access denied on create */
54                 /* return GENERIC_ALL; */
55                 return (GENERIC_READ | GENERIC_WRITE);
56         }
57
58         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
60                 FILE_READ_DATA);
61 }
62
63 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
64 {
65         fmode_t posix_flags = 0;
66
67         if ((flags & O_ACCMODE) == O_RDONLY)
68                 posix_flags = FMODE_READ;
69         else if ((flags & O_ACCMODE) == O_WRONLY)
70                 posix_flags = FMODE_WRITE;
71         else if ((flags & O_ACCMODE) == O_RDWR) {
72                 /* GENERIC_ALL is too much permission to request
73                    can cause unnecessary access denied on create */
74                 /* return GENERIC_ALL; */
75                 posix_flags = FMODE_READ | FMODE_WRITE;
76         }
77         /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
78            reopening a file.  They had their effect on the original open */
79         if (flags & O_APPEND)
80                 posix_flags |= (fmode_t)O_APPEND;
81         if (flags & O_DSYNC)
82                 posix_flags |= (fmode_t)O_DSYNC;
83         if (flags & __O_SYNC)
84                 posix_flags |= (fmode_t)__O_SYNC;
85         if (flags & O_DIRECTORY)
86                 posix_flags |= (fmode_t)O_DIRECTORY;
87         if (flags & O_NOFOLLOW)
88                 posix_flags |= (fmode_t)O_NOFOLLOW;
89         if (flags & O_DIRECT)
90                 posix_flags |= (fmode_t)O_DIRECT;
91
92         return posix_flags;
93 }
94
95 static inline int cifs_get_disposition(unsigned int flags)
96 {
97         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98                 return FILE_CREATE;
99         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
100                 return FILE_OVERWRITE_IF;
101         else if ((flags & O_CREAT) == O_CREAT)
102                 return FILE_OPEN_IF;
103         else if ((flags & O_TRUNC) == O_TRUNC)
104                 return FILE_OVERWRITE;
105         else
106                 return FILE_OPEN;
107 }
108
109 /* all arguments to this function must be checked for validity in caller */
110 static inline int
111 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
112                              struct cifsInodeInfo *pCifsInode, __u32 oplock,
113                              u16 netfid)
114 {
115
116         write_lock(&GlobalSMBSeslock);
117
118         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
119         if (pCifsInode == NULL) {
120                 write_unlock(&GlobalSMBSeslock);
121                 return -EINVAL;
122         }
123
124         if (pCifsInode->clientCanCacheRead) {
125                 /* we have the inode open somewhere else
126                    no need to discard cache data */
127                 goto psx_client_can_cache;
128         }
129
130         /* BB FIXME need to fix this check to move it earlier into posix_open
131            BB  fIX following section BB FIXME */
132
133         /* if not oplocked, invalidate inode pages if mtime or file
134            size changed */
135 /*      temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
136         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
137                            (file->f_path.dentry->d_inode->i_size ==
138                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
139                 cFYI(1, "inode unchanged on server");
140         } else {
141                 if (file->f_path.dentry->d_inode->i_mapping) {
142                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
143                         if (rc != 0)
144                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
145                 }
146                 cFYI(1, "invalidating remote inode since open detected it "
147                          "changed");
148                 invalidate_remote_inode(file->f_path.dentry->d_inode);
149         } */
150
151 psx_client_can_cache:
152         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
153                 pCifsInode->clientCanCacheAll = true;
154                 pCifsInode->clientCanCacheRead = true;
155                 cFYI(1, "Exclusive Oplock granted on inode %p",
156                          file->f_path.dentry->d_inode);
157         } else if ((oplock & 0xF) == OPLOCK_READ)
158                 pCifsInode->clientCanCacheRead = true;
159
160         /* will have to change the unlock if we reenable the
161            filemap_fdatawrite (which does not seem necessary */
162         write_unlock(&GlobalSMBSeslock);
163         return 0;
164 }
165
166 /* all arguments to this function must be checked for validity in caller */
167 static inline int cifs_open_inode_helper(struct inode *inode,
168         struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
169         char *full_path, int xid)
170 {
171         struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
172         struct timespec temp;
173         int rc;
174
175         if (pCifsInode->clientCanCacheRead) {
176                 /* we have the inode open somewhere else
177                    no need to discard cache data */
178                 goto client_can_cache;
179         }
180
181         /* BB need same check in cifs_create too? */
182         /* if not oplocked, invalidate inode pages if mtime or file
183            size changed */
184         temp = cifs_NTtimeToUnix(buf->LastWriteTime);
185         if (timespec_equal(&inode->i_mtime, &temp) &&
186                            (inode->i_size ==
187                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
188                 cFYI(1, "inode unchanged on server");
189         } else {
190                 if (inode->i_mapping) {
191                         /* BB no need to lock inode until after invalidate
192                         since namei code should already have it locked? */
193                         rc = filemap_write_and_wait(inode->i_mapping);
194                         if (rc != 0)
195                                 pCifsInode->write_behind_rc = rc;
196                 }
197                 cFYI(1, "invalidating remote inode since open detected it "
198                          "changed");
199                 invalidate_remote_inode(inode);
200         }
201
202 client_can_cache:
203         if (pTcon->unix_ext)
204                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
205                                               xid);
206         else
207                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
208                                          xid, NULL);
209
210         if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
211                 pCifsInode->clientCanCacheAll = true;
212                 pCifsInode->clientCanCacheRead = true;
213                 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
214         } else if ((*oplock & 0xF) == OPLOCK_READ)
215                 pCifsInode->clientCanCacheRead = true;
216
217         return rc;
218 }
219
220 int cifs_open(struct inode *inode, struct file *file)
221 {
222         int rc = -EACCES;
223         int xid;
224         __u32 oplock;
225         struct cifs_sb_info *cifs_sb;
226         struct cifsTconInfo *tcon;
227         struct cifsFileInfo *pCifsFile = NULL;
228         struct cifsInodeInfo *pCifsInode;
229         char *full_path = NULL;
230         int desiredAccess;
231         int disposition;
232         __u16 netfid;
233         FILE_ALL_INFO *buf = NULL;
234
235         xid = GetXid();
236
237         cifs_sb = CIFS_SB(inode->i_sb);
238         tcon = cifs_sb->tcon;
239
240         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
241
242         full_path = build_path_from_dentry(file->f_path.dentry);
243         if (full_path == NULL) {
244                 rc = -ENOMEM;
245                 FreeXid(xid);
246                 return rc;
247         }
248
249         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
250                  inode, file->f_flags, full_path);
251
252         if (oplockEnabled)
253                 oplock = REQ_OPLOCK;
254         else
255                 oplock = 0;
256
257         if (!tcon->broken_posix_open && tcon->unix_ext &&
258             (tcon->ses->capabilities & CAP_UNIX) &&
259             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
260                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
261                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
262                 oflags |= SMB_O_CREAT;
263                 /* can not refresh inode info since size could be stale */
264                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
265                                 cifs_sb->mnt_file_mode /* ignored */,
266                                 oflags, &oplock, &netfid, xid);
267                 if (rc == 0) {
268                         cFYI(1, "posix open succeeded");
269                         /* no need for special case handling of setting mode
270                            on read only files needed here */
271
272                         rc = cifs_posix_open_inode_helper(inode, file,
273                                         pCifsInode, oplock, netfid);
274                         if (rc != 0) {
275                                 CIFSSMBClose(xid, tcon, netfid);
276                                 goto out;
277                         }
278
279                         pCifsFile = cifs_new_fileinfo(inode, netfid, file,
280                                                         file->f_path.mnt,
281                                                         oflags);
282                         if (pCifsFile == NULL) {
283                                 CIFSSMBClose(xid, tcon, netfid);
284                                 rc = -ENOMEM;
285                         }
286
287                         cifs_fscache_set_inode_cookie(inode, file);
288
289                         goto out;
290                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
291                         if (tcon->ses->serverNOS)
292                                 cERROR(1, "server %s of type %s returned"
293                                            " unexpected error on SMB posix open"
294                                            ", disabling posix open support."
295                                            " Check if server update available.",
296                                            tcon->ses->serverName,
297                                            tcon->ses->serverNOS);
298                         tcon->broken_posix_open = true;
299                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
300                          (rc != -EOPNOTSUPP)) /* path not found or net err */
301                         goto out;
302                 /* else fallthrough to retry open the old way on network i/o
303                    or DFS errors */
304         }
305
306         desiredAccess = cifs_convert_flags(file->f_flags);
307
308 /*********************************************************************
309  *  open flag mapping table:
310  *
311  *      POSIX Flag            CIFS Disposition
312  *      ----------            ----------------
313  *      O_CREAT               FILE_OPEN_IF
314  *      O_CREAT | O_EXCL      FILE_CREATE
315  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
316  *      O_TRUNC               FILE_OVERWRITE
317  *      none of the above     FILE_OPEN
318  *
319  *      Note that there is not a direct match between disposition
320  *      FILE_SUPERSEDE (ie create whether or not file exists although
321  *      O_CREAT | O_TRUNC is similar but truncates the existing
322  *      file rather than creating a new file as FILE_SUPERSEDE does
323  *      (which uses the attributes / metadata passed in on open call)
324  *?
325  *?  O_SYNC is a reasonable match to CIFS writethrough flag
326  *?  and the read write flags match reasonably.  O_LARGEFILE
327  *?  is irrelevant because largefile support is always used
328  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
329  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
330  *********************************************************************/
331
332         disposition = cifs_get_disposition(file->f_flags);
333
334         /* BB pass O_SYNC flag through on file attributes .. BB */
335
336         /* Also refresh inode by passing in file_info buf returned by SMBOpen
337            and calling get_inode_info with returned buf (at least helps
338            non-Unix server case) */
339
340         /* BB we can not do this if this is the second open of a file
341            and the first handle has writebehind data, we might be
342            able to simply do a filemap_fdatawrite/filemap_fdatawait first */
343         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
344         if (!buf) {
345                 rc = -ENOMEM;
346                 goto out;
347         }
348
349         if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
350                 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
351                          desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
352                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
353                                  & CIFS_MOUNT_MAP_SPECIAL_CHR);
354         else
355                 rc = -EIO; /* no NT SMB support fall into legacy open below */
356
357         if (rc == -EIO) {
358                 /* Old server, try legacy style OpenX */
359                 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
360                         desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
361                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
362                                 & CIFS_MOUNT_MAP_SPECIAL_CHR);
363         }
364         if (rc) {
365                 cFYI(1, "cifs_open returned 0x%x", rc);
366                 goto out;
367         }
368
369         rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
370         if (rc != 0)
371                 goto out;
372
373         pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
374                                         file->f_flags);
375         if (pCifsFile == NULL) {
376                 rc = -ENOMEM;
377                 goto out;
378         }
379
380         cifs_fscache_set_inode_cookie(inode, file);
381
382         if (oplock & CIFS_CREATE_ACTION) {
383                 /* time to set mode which we can not set earlier due to
384                    problems creating new read-only files */
385                 if (tcon->unix_ext) {
386                         struct cifs_unix_set_info_args args = {
387                                 .mode   = inode->i_mode,
388                                 .uid    = NO_CHANGE_64,
389                                 .gid    = NO_CHANGE_64,
390                                 .ctime  = NO_CHANGE_64,
391                                 .atime  = NO_CHANGE_64,
392                                 .mtime  = NO_CHANGE_64,
393                                 .device = 0,
394                         };
395                         CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
396                                                cifs_sb->local_nls,
397                                                cifs_sb->mnt_cifs_flags &
398                                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
399                 }
400         }
401
402 out:
403         kfree(buf);
404         kfree(full_path);
405         FreeXid(xid);
406         return rc;
407 }
408
409 /* Try to reacquire byte range locks that were released when session */
410 /* to server was lost */
411 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
412 {
413         int rc = 0;
414
415 /* BB list all locks open on this file and relock */
416
417         return rc;
418 }
419
420 static int cifs_reopen_file(struct file *file, bool can_flush)
421 {
422         int rc = -EACCES;
423         int xid;
424         __u32 oplock;
425         struct cifs_sb_info *cifs_sb;
426         struct cifsTconInfo *tcon;
427         struct cifsFileInfo *pCifsFile;
428         struct cifsInodeInfo *pCifsInode;
429         struct inode *inode;
430         char *full_path = NULL;
431         int desiredAccess;
432         int disposition = FILE_OPEN;
433         __u16 netfid;
434
435         if (file->private_data)
436                 pCifsFile = file->private_data;
437         else
438                 return -EBADF;
439
440         xid = GetXid();
441         mutex_lock(&pCifsFile->fh_mutex);
442         if (!pCifsFile->invalidHandle) {
443                 mutex_unlock(&pCifsFile->fh_mutex);
444                 rc = 0;
445                 FreeXid(xid);
446                 return rc;
447         }
448
449         if (file->f_path.dentry == NULL) {
450                 cERROR(1, "no valid name if dentry freed");
451                 dump_stack();
452                 rc = -EBADF;
453                 goto reopen_error_exit;
454         }
455
456         inode = file->f_path.dentry->d_inode;
457         if (inode == NULL) {
458                 cERROR(1, "inode not valid");
459                 dump_stack();
460                 rc = -EBADF;
461                 goto reopen_error_exit;
462         }
463
464         cifs_sb = CIFS_SB(inode->i_sb);
465         tcon = cifs_sb->tcon;
466
467 /* can not grab rename sem here because various ops, including
468    those that already have the rename sem can end up causing writepage
469    to get called and if the server was down that means we end up here,
470    and we can never tell if the caller already has the rename_sem */
471         full_path = build_path_from_dentry(file->f_path.dentry);
472         if (full_path == NULL) {
473                 rc = -ENOMEM;
474 reopen_error_exit:
475                 mutex_unlock(&pCifsFile->fh_mutex);
476                 FreeXid(xid);
477                 return rc;
478         }
479
480         cFYI(1, "inode = 0x%p file flags 0x%x for %s",
481                  inode, file->f_flags, full_path);
482
483         if (oplockEnabled)
484                 oplock = REQ_OPLOCK;
485         else
486                 oplock = 0;
487
488         if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
489             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
490                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
491                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
492                 /* can not refresh inode info since size could be stale */
493                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
494                                 cifs_sb->mnt_file_mode /* ignored */,
495                                 oflags, &oplock, &netfid, xid);
496                 if (rc == 0) {
497                         cFYI(1, "posix reopen succeeded");
498                         goto reopen_success;
499                 }
500                 /* fallthrough to retry open the old way on errors, especially
501                    in the reconnect path it is important to retry hard */
502         }
503
504         desiredAccess = cifs_convert_flags(file->f_flags);
505
506         /* Can not refresh inode by passing in file_info buf to be returned
507            by SMBOpen and then calling get_inode_info with returned buf
508            since file might have write behind data that needs to be flushed
509            and server version of file size can be stale. If we knew for sure
510            that inode was not dirty locally we could do this */
511
512         rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
513                          CREATE_NOT_DIR, &netfid, &oplock, NULL,
514                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
515                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
516         if (rc) {
517                 mutex_unlock(&pCifsFile->fh_mutex);
518                 cFYI(1, "cifs_open returned 0x%x", rc);
519                 cFYI(1, "oplock: %d", oplock);
520         } else {
521 reopen_success:
522                 pCifsFile->netfid = netfid;
523                 pCifsFile->invalidHandle = false;
524                 mutex_unlock(&pCifsFile->fh_mutex);
525                 pCifsInode = CIFS_I(inode);
526                 if (pCifsInode) {
527                         if (can_flush) {
528                                 rc = filemap_write_and_wait(inode->i_mapping);
529                                 if (rc != 0)
530                                         CIFS_I(inode)->write_behind_rc = rc;
531                         /* temporarily disable caching while we
532                            go to server to get inode info */
533                                 pCifsInode->clientCanCacheAll = false;
534                                 pCifsInode->clientCanCacheRead = false;
535                                 if (tcon->unix_ext)
536                                         rc = cifs_get_inode_info_unix(&inode,
537                                                 full_path, inode->i_sb, xid);
538                                 else
539                                         rc = cifs_get_inode_info(&inode,
540                                                 full_path, NULL, inode->i_sb,
541                                                 xid, NULL);
542                         } /* else we are writing out data to server already
543                              and could deadlock if we tried to flush data, and
544                              since we do not know if we have data that would
545                              invalidate the current end of file on the server
546                              we can not go to the server to get the new inod
547                              info */
548                         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
549                                 pCifsInode->clientCanCacheAll = true;
550                                 pCifsInode->clientCanCacheRead = true;
551                                 cFYI(1, "Exclusive Oplock granted on inode %p",
552                                          file->f_path.dentry->d_inode);
553                         } else if ((oplock & 0xF) == OPLOCK_READ) {
554                                 pCifsInode->clientCanCacheRead = true;
555                                 pCifsInode->clientCanCacheAll = false;
556                         } else {
557                                 pCifsInode->clientCanCacheRead = false;
558                                 pCifsInode->clientCanCacheAll = false;
559                         }
560                         cifs_relock_file(pCifsFile);
561                 }
562         }
563         kfree(full_path);
564         FreeXid(xid);
565         return rc;
566 }
567
568 int cifs_close(struct inode *inode, struct file *file)
569 {
570         int rc = 0;
571         int xid, timeout;
572         struct cifs_sb_info *cifs_sb;
573         struct cifsTconInfo *pTcon;
574         struct cifsFileInfo *pSMBFile = file->private_data;
575
576         xid = GetXid();
577
578         cifs_sb = CIFS_SB(inode->i_sb);
579         pTcon = cifs_sb->tcon;
580         if (pSMBFile) {
581                 struct cifsLockInfo *li, *tmp;
582                 write_lock(&GlobalSMBSeslock);
583                 pSMBFile->closePend = true;
584                 if (pTcon) {
585                         /* no sense reconnecting to close a file that is
586                            already closed */
587                         if (!pTcon->need_reconnect) {
588                                 write_unlock(&GlobalSMBSeslock);
589                                 timeout = 2;
590                                 while ((atomic_read(&pSMBFile->count) != 1)
591                                         && (timeout <= 2048)) {
592                                         /* Give write a better chance to get to
593                                         server ahead of the close.  We do not
594                                         want to add a wait_q here as it would
595                                         increase the memory utilization as
596                                         the struct would be in each open file,
597                                         but this should give enough time to
598                                         clear the socket */
599                                         cFYI(DBG2, "close delay, write pending");
600                                         msleep(timeout);
601                                         timeout *= 4;
602                                 }
603                                 if (!pTcon->need_reconnect &&
604                                     !pSMBFile->invalidHandle)
605                                         rc = CIFSSMBClose(xid, pTcon,
606                                                   pSMBFile->netfid);
607                         } else
608                                 write_unlock(&GlobalSMBSeslock);
609                 } else
610                         write_unlock(&GlobalSMBSeslock);
611
612                 /* Delete any outstanding lock records.
613                    We'll lose them when the file is closed anyway. */
614                 mutex_lock(&pSMBFile->lock_mutex);
615                 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
616                         list_del(&li->llist);
617                         kfree(li);
618                 }
619                 mutex_unlock(&pSMBFile->lock_mutex);
620
621                 write_lock(&GlobalSMBSeslock);
622                 list_del(&pSMBFile->flist);
623                 list_del(&pSMBFile->tlist);
624                 write_unlock(&GlobalSMBSeslock);
625                 cifsFileInfo_put(file->private_data);
626                 file->private_data = NULL;
627         } else
628                 rc = -EBADF;
629
630         read_lock(&GlobalSMBSeslock);
631         if (list_empty(&(CIFS_I(inode)->openFileList))) {
632                 cFYI(1, "closing last open instance for inode %p", inode);
633                 /* if the file is not open we do not know if we can cache info
634                    on this inode, much less write behind and read ahead */
635                 CIFS_I(inode)->clientCanCacheRead = false;
636                 CIFS_I(inode)->clientCanCacheAll  = false;
637         }
638         read_unlock(&GlobalSMBSeslock);
639         if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
640                 rc = CIFS_I(inode)->write_behind_rc;
641         FreeXid(xid);
642         return rc;
643 }
644
645 int cifs_closedir(struct inode *inode, struct file *file)
646 {
647         int rc = 0;
648         int xid;
649         struct cifsFileInfo *pCFileStruct = file->private_data;
650         char *ptmp;
651
652         cFYI(1, "Closedir inode = 0x%p", inode);
653
654         xid = GetXid();
655
656         if (pCFileStruct) {
657                 struct cifsTconInfo *pTcon;
658                 struct cifs_sb_info *cifs_sb =
659                         CIFS_SB(file->f_path.dentry->d_sb);
660
661                 pTcon = cifs_sb->tcon;
662
663                 cFYI(1, "Freeing private data in close dir");
664                 write_lock(&GlobalSMBSeslock);
665                 if (!pCFileStruct->srch_inf.endOfSearch &&
666                     !pCFileStruct->invalidHandle) {
667                         pCFileStruct->invalidHandle = true;
668                         write_unlock(&GlobalSMBSeslock);
669                         rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
670                         cFYI(1, "Closing uncompleted readdir with rc %d",
671                                  rc);
672                         /* not much we can do if it fails anyway, ignore rc */
673                         rc = 0;
674                 } else
675                         write_unlock(&GlobalSMBSeslock);
676                 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
677                 if (ptmp) {
678                         cFYI(1, "closedir free smb buf in srch struct");
679                         pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
680                         if (pCFileStruct->srch_inf.smallBuf)
681                                 cifs_small_buf_release(ptmp);
682                         else
683                                 cifs_buf_release(ptmp);
684                 }
685                 kfree(file->private_data);
686                 file->private_data = NULL;
687         }
688         /* BB can we lock the filestruct while this is going on? */
689         FreeXid(xid);
690         return rc;
691 }
692
693 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
694                                 __u64 offset, __u8 lockType)
695 {
696         struct cifsLockInfo *li =
697                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
698         if (li == NULL)
699                 return -ENOMEM;
700         li->offset = offset;
701         li->length = len;
702         li->type = lockType;
703         mutex_lock(&fid->lock_mutex);
704         list_add(&li->llist, &fid->llist);
705         mutex_unlock(&fid->lock_mutex);
706         return 0;
707 }
708
709 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
710 {
711         int rc, xid;
712         __u32 numLock = 0;
713         __u32 numUnlock = 0;
714         __u64 length;
715         bool wait_flag = false;
716         struct cifs_sb_info *cifs_sb;
717         struct cifsTconInfo *tcon;
718         __u16 netfid;
719         __u8 lockType = LOCKING_ANDX_LARGE_FILES;
720         bool posix_locking = 0;
721
722         length = 1 + pfLock->fl_end - pfLock->fl_start;
723         rc = -EACCES;
724         xid = GetXid();
725
726         cFYI(1, "Lock parm: 0x%x flockflags: "
727                  "0x%x flocktype: 0x%x start: %lld end: %lld",
728                 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
729                 pfLock->fl_end);
730
731         if (pfLock->fl_flags & FL_POSIX)
732                 cFYI(1, "Posix");
733         if (pfLock->fl_flags & FL_FLOCK)
734                 cFYI(1, "Flock");
735         if (pfLock->fl_flags & FL_SLEEP) {
736                 cFYI(1, "Blocking lock");
737                 wait_flag = true;
738         }
739         if (pfLock->fl_flags & FL_ACCESS)
740                 cFYI(1, "Process suspended by mandatory locking - "
741                          "not implemented yet");
742         if (pfLock->fl_flags & FL_LEASE)
743                 cFYI(1, "Lease on file - not implemented yet");
744         if (pfLock->fl_flags &
745             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
746                 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
747
748         if (pfLock->fl_type == F_WRLCK) {
749                 cFYI(1, "F_WRLCK ");
750                 numLock = 1;
751         } else if (pfLock->fl_type == F_UNLCK) {
752                 cFYI(1, "F_UNLCK");
753                 numUnlock = 1;
754                 /* Check if unlock includes more than
755                 one lock range */
756         } else if (pfLock->fl_type == F_RDLCK) {
757                 cFYI(1, "F_RDLCK");
758                 lockType |= LOCKING_ANDX_SHARED_LOCK;
759                 numLock = 1;
760         } else if (pfLock->fl_type == F_EXLCK) {
761                 cFYI(1, "F_EXLCK");
762                 numLock = 1;
763         } else if (pfLock->fl_type == F_SHLCK) {
764                 cFYI(1, "F_SHLCK");
765                 lockType |= LOCKING_ANDX_SHARED_LOCK;
766                 numLock = 1;
767         } else
768                 cFYI(1, "Unknown type of lock");
769
770         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
771         tcon = cifs_sb->tcon;
772
773         if (file->private_data == NULL) {
774                 rc = -EBADF;
775                 FreeXid(xid);
776                 return rc;
777         }
778         netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
779
780         if ((tcon->ses->capabilities & CAP_UNIX) &&
781             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
782             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
783                 posix_locking = 1;
784         /* BB add code here to normalize offset and length to
785         account for negative length which we can not accept over the
786         wire */
787         if (IS_GETLK(cmd)) {
788                 if (posix_locking) {
789                         int posix_lock_type;
790                         if (lockType & LOCKING_ANDX_SHARED_LOCK)
791                                 posix_lock_type = CIFS_RDLCK;
792                         else
793                                 posix_lock_type = CIFS_WRLCK;
794                         rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
795                                         length, pfLock,
796                                         posix_lock_type, wait_flag);
797                         FreeXid(xid);
798                         return rc;
799                 }
800
801                 /* BB we could chain these into one lock request BB */
802                 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
803                                  0, 1, lockType, 0 /* wait flag */ );
804                 if (rc == 0) {
805                         rc = CIFSSMBLock(xid, tcon, netfid, length,
806                                          pfLock->fl_start, 1 /* numUnlock */ ,
807                                          0 /* numLock */ , lockType,
808                                          0 /* wait flag */ );
809                         pfLock->fl_type = F_UNLCK;
810                         if (rc != 0)
811                                 cERROR(1, "Error unlocking previously locked "
812                                            "range %d during test of lock", rc);
813                         rc = 0;
814
815                 } else {
816                         /* if rc == ERR_SHARING_VIOLATION ? */
817                         rc = 0;
818
819                         if (lockType & LOCKING_ANDX_SHARED_LOCK) {
820                                 pfLock->fl_type = F_WRLCK;
821                         } else {
822                                 rc = CIFSSMBLock(xid, tcon, netfid, length,
823                                         pfLock->fl_start, 0, 1,
824                                         lockType | LOCKING_ANDX_SHARED_LOCK,
825                                         0 /* wait flag */);
826                                 if (rc == 0) {
827                                         rc = CIFSSMBLock(xid, tcon, netfid,
828                                                 length, pfLock->fl_start, 1, 0,
829                                                 lockType |
830                                                 LOCKING_ANDX_SHARED_LOCK,
831                                                 0 /* wait flag */);
832                                         pfLock->fl_type = F_RDLCK;
833                                         if (rc != 0)
834                                                 cERROR(1, "Error unlocking "
835                                                 "previously locked range %d "
836                                                 "during test of lock", rc);
837                                         rc = 0;
838                                 } else {
839                                         pfLock->fl_type = F_WRLCK;
840                                         rc = 0;
841                                 }
842                         }
843                 }
844
845                 FreeXid(xid);
846                 return rc;
847         }
848
849         if (!numLock && !numUnlock) {
850                 /* if no lock or unlock then nothing
851                 to do since we do not know what it is */
852                 FreeXid(xid);
853                 return -EOPNOTSUPP;
854         }
855
856         if (posix_locking) {
857                 int posix_lock_type;
858                 if (lockType & LOCKING_ANDX_SHARED_LOCK)
859                         posix_lock_type = CIFS_RDLCK;
860                 else
861                         posix_lock_type = CIFS_WRLCK;
862
863                 if (numUnlock == 1)
864                         posix_lock_type = CIFS_UNLCK;
865
866                 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
867                                       length, pfLock,
868                                       posix_lock_type, wait_flag);
869         } else {
870                 struct cifsFileInfo *fid = file->private_data;
871
872                 if (numLock) {
873                         rc = CIFSSMBLock(xid, tcon, netfid, length,
874                                         pfLock->fl_start,
875                                         0, numLock, lockType, wait_flag);
876
877                         if (rc == 0) {
878                                 /* For Windows locks we must store them. */
879                                 rc = store_file_lock(fid, length,
880                                                 pfLock->fl_start, lockType);
881                         }
882                 } else if (numUnlock) {
883                         /* For each stored lock that this unlock overlaps
884                            completely, unlock it. */
885                         int stored_rc = 0;
886                         struct cifsLockInfo *li, *tmp;
887
888                         rc = 0;
889                         mutex_lock(&fid->lock_mutex);
890                         list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
891                                 if (pfLock->fl_start <= li->offset &&
892                                                 (pfLock->fl_start + length) >=
893                                                 (li->offset + li->length)) {
894                                         stored_rc = CIFSSMBLock(xid, tcon,
895                                                         netfid,
896                                                         li->length, li->offset,
897                                                         1, 0, li->type, false);
898                                         if (stored_rc)
899                                                 rc = stored_rc;
900                                         else {
901                                                 list_del(&li->llist);
902                                                 kfree(li);
903                                         }
904                                 }
905                         }
906                         mutex_unlock(&fid->lock_mutex);
907                 }
908         }
909
910         if (pfLock->fl_flags & FL_POSIX)
911                 posix_lock_file_wait(file, pfLock);
912         FreeXid(xid);
913         return rc;
914 }
915
916 /*
917  * Set the timeout on write requests past EOF. For some servers (Windows)
918  * these calls can be very long.
919  *
920  * If we're writing >10M past the EOF we give a 180s timeout. Anything less
921  * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
922  * The 10M cutoff is totally arbitrary. A better scheme for this would be
923  * welcome if someone wants to suggest one.
924  *
925  * We may be able to do a better job with this if there were some way to
926  * declare that a file should be sparse.
927  */
928 static int
929 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
930 {
931         if (offset <= cifsi->server_eof)
932                 return CIFS_STD_OP;
933         else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
934                 return CIFS_VLONG_OP;
935         else
936                 return CIFS_LONG_OP;
937 }
938
939 /* update the file size (if needed) after a write */
940 static void
941 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
942                       unsigned int bytes_written)
943 {
944         loff_t end_of_write = offset + bytes_written;
945
946         if (end_of_write > cifsi->server_eof)
947                 cifsi->server_eof = end_of_write;
948 }
949
950 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
951         size_t write_size, loff_t *poffset)
952 {
953         int rc = 0;
954         unsigned int bytes_written = 0;
955         unsigned int total_written;
956         struct cifs_sb_info *cifs_sb;
957         struct cifsTconInfo *pTcon;
958         int xid, long_op;
959         struct cifsFileInfo *open_file;
960         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
961
962         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
963
964         pTcon = cifs_sb->tcon;
965
966         /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
967            *poffset, file->f_path.dentry->d_name.name); */
968
969         if (file->private_data == NULL)
970                 return -EBADF;
971         open_file = file->private_data;
972
973         rc = generic_write_checks(file, poffset, &write_size, 0);
974         if (rc)
975                 return rc;
976
977         xid = GetXid();
978
979         long_op = cifs_write_timeout(cifsi, *poffset);
980         for (total_written = 0; write_size > total_written;
981              total_written += bytes_written) {
982                 rc = -EAGAIN;
983                 while (rc == -EAGAIN) {
984                         if (file->private_data == NULL) {
985                                 /* file has been closed on us */
986                                 FreeXid(xid);
987                         /* if we have gotten here we have written some data
988                            and blocked, and the file has been freed on us while
989                            we blocked so return what we managed to write */
990                                 return total_written;
991                         }
992                         if (open_file->closePend) {
993                                 FreeXid(xid);
994                                 if (total_written)
995                                         return total_written;
996                                 else
997                                         return -EBADF;
998                         }
999                         if (open_file->invalidHandle) {
1000                                 /* we could deadlock if we called
1001                                    filemap_fdatawait from here so tell
1002                                    reopen_file not to flush data to server
1003                                    now */
1004                                 rc = cifs_reopen_file(file, false);
1005                                 if (rc != 0)
1006                                         break;
1007                         }
1008
1009                         rc = CIFSSMBWrite(xid, pTcon,
1010                                 open_file->netfid,
1011                                 min_t(const int, cifs_sb->wsize,
1012                                       write_size - total_written),
1013                                 *poffset, &bytes_written,
1014                                 NULL, write_data + total_written, long_op);
1015                 }
1016                 if (rc || (bytes_written == 0)) {
1017                         if (total_written)
1018                                 break;
1019                         else {
1020                                 FreeXid(xid);
1021                                 return rc;
1022                         }
1023                 } else {
1024                         cifs_update_eof(cifsi, *poffset, bytes_written);
1025                         *poffset += bytes_written;
1026                 }
1027                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1028                                     15 seconds is plenty */
1029         }
1030
1031         cifs_stats_bytes_written(pTcon, total_written);
1032
1033         /* since the write may have blocked check these pointers again */
1034         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1035                 struct inode *inode = file->f_path.dentry->d_inode;
1036 /* Do not update local mtime - server will set its actual value on write
1037  *              inode->i_ctime = inode->i_mtime =
1038  *                      current_fs_time(inode->i_sb);*/
1039                 if (total_written > 0) {
1040                         spin_lock(&inode->i_lock);
1041                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1042                                 i_size_write(file->f_path.dentry->d_inode,
1043                                         *poffset);
1044                         spin_unlock(&inode->i_lock);
1045                 }
1046                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1047         }
1048         FreeXid(xid);
1049         return total_written;
1050 }
1051
1052 static ssize_t cifs_write(struct file *file, const char *write_data,
1053                           size_t write_size, loff_t *poffset)
1054 {
1055         int rc = 0;
1056         unsigned int bytes_written = 0;
1057         unsigned int total_written;
1058         struct cifs_sb_info *cifs_sb;
1059         struct cifsTconInfo *pTcon;
1060         int xid, long_op;
1061         struct cifsFileInfo *open_file;
1062         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1063
1064         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1065
1066         pTcon = cifs_sb->tcon;
1067
1068         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1069            *poffset, file->f_path.dentry->d_name.name);
1070
1071         if (file->private_data == NULL)
1072                 return -EBADF;
1073         open_file = file->private_data;
1074
1075         xid = GetXid();
1076
1077         long_op = cifs_write_timeout(cifsi, *poffset);
1078         for (total_written = 0; write_size > total_written;
1079              total_written += bytes_written) {
1080                 rc = -EAGAIN;
1081                 while (rc == -EAGAIN) {
1082                         if (file->private_data == NULL) {
1083                                 /* file has been closed on us */
1084                                 FreeXid(xid);
1085                         /* if we have gotten here we have written some data
1086                            and blocked, and the file has been freed on us
1087                            while we blocked so return what we managed to
1088                            write */
1089                                 return total_written;
1090                         }
1091                         if (open_file->closePend) {
1092                                 FreeXid(xid);
1093                                 if (total_written)
1094                                         return total_written;
1095                                 else
1096                                         return -EBADF;
1097                         }
1098                         if (open_file->invalidHandle) {
1099                                 /* we could deadlock if we called
1100                                    filemap_fdatawait from here so tell
1101                                    reopen_file not to flush data to
1102                                    server now */
1103                                 rc = cifs_reopen_file(file, false);
1104                                 if (rc != 0)
1105                                         break;
1106                         }
1107                         if (experimEnabled || (pTcon->ses->server &&
1108                                 ((pTcon->ses->server->secMode &
1109                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1110                                 == 0))) {
1111                                 struct kvec iov[2];
1112                                 unsigned int len;
1113
1114                                 len = min((size_t)cifs_sb->wsize,
1115                                           write_size - total_written);
1116                                 /* iov[0] is reserved for smb header */
1117                                 iov[1].iov_base = (char *)write_data +
1118                                                   total_written;
1119                                 iov[1].iov_len = len;
1120                                 rc = CIFSSMBWrite2(xid, pTcon,
1121                                                 open_file->netfid, len,
1122                                                 *poffset, &bytes_written,
1123                                                 iov, 1, long_op);
1124                         } else
1125                                 rc = CIFSSMBWrite(xid, pTcon,
1126                                          open_file->netfid,
1127                                          min_t(const int, cifs_sb->wsize,
1128                                                write_size - total_written),
1129                                          *poffset, &bytes_written,
1130                                          write_data + total_written,
1131                                          NULL, long_op);
1132                 }
1133                 if (rc || (bytes_written == 0)) {
1134                         if (total_written)
1135                                 break;
1136                         else {
1137                                 FreeXid(xid);
1138                                 return rc;
1139                         }
1140                 } else {
1141                         cifs_update_eof(cifsi, *poffset, bytes_written);
1142                         *poffset += bytes_written;
1143                 }
1144                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1145                                     15 seconds is plenty */
1146         }
1147
1148         cifs_stats_bytes_written(pTcon, total_written);
1149
1150         /* since the write may have blocked check these pointers again */
1151         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1152 /*BB We could make this contingent on superblock ATIME flag too */
1153 /*              file->f_path.dentry->d_inode->i_ctime =
1154                 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1155                 if (total_written > 0) {
1156                         spin_lock(&file->f_path.dentry->d_inode->i_lock);
1157                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1158                                 i_size_write(file->f_path.dentry->d_inode,
1159                                              *poffset);
1160                         spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1161                 }
1162                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1163         }
1164         FreeXid(xid);
1165         return total_written;
1166 }
1167
1168 #ifdef CONFIG_CIFS_EXPERIMENTAL
1169 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1170 {
1171         struct cifsFileInfo *open_file = NULL;
1172
1173         read_lock(&GlobalSMBSeslock);
1174         /* we could simply get the first_list_entry since write-only entries
1175            are always at the end of the list but since the first entry might
1176            have a close pending, we go through the whole list */
1177         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1178                 if (open_file->closePend)
1179                         continue;
1180                 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1181                     (open_file->pfile->f_flags & O_RDONLY))) {
1182                         if (!open_file->invalidHandle) {
1183                                 /* found a good file */
1184                                 /* lock it so it will not be closed on us */
1185                                 cifsFileInfo_get(open_file);
1186                                 read_unlock(&GlobalSMBSeslock);
1187                                 return open_file;
1188                         } /* else might as well continue, and look for
1189                              another, or simply have the caller reopen it
1190                              again rather than trying to fix this handle */
1191                 } else /* write only file */
1192                         break; /* write only files are last so must be done */
1193         }
1194         read_unlock(&GlobalSMBSeslock);
1195         return NULL;
1196 }
1197 #endif
1198
1199 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1200 {
1201         struct cifsFileInfo *open_file;
1202         bool any_available = false;
1203         int rc;
1204
1205         /* Having a null inode here (because mapping->host was set to zero by
1206         the VFS or MM) should not happen but we had reports of on oops (due to
1207         it being zero) during stress testcases so we need to check for it */
1208
1209         if (cifs_inode == NULL) {
1210                 cERROR(1, "Null inode passed to cifs_writeable_file");
1211                 dump_stack();
1212                 return NULL;
1213         }
1214
1215         read_lock(&GlobalSMBSeslock);
1216 refind_writable:
1217         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1218                 if (open_file->closePend ||
1219                     (!any_available && open_file->pid != current->tgid))
1220                         continue;
1221
1222                 if (open_file->pfile &&
1223                     ((open_file->pfile->f_flags & O_RDWR) ||
1224                      (open_file->pfile->f_flags & O_WRONLY))) {
1225                         cifsFileInfo_get(open_file);
1226
1227                         if (!open_file->invalidHandle) {
1228                                 /* found a good writable file */
1229                                 read_unlock(&GlobalSMBSeslock);
1230                                 return open_file;
1231                         }
1232
1233                         read_unlock(&GlobalSMBSeslock);
1234                         /* Had to unlock since following call can block */
1235                         rc = cifs_reopen_file(open_file->pfile, false);
1236                         if (!rc) {
1237                                 if (!open_file->closePend)
1238                                         return open_file;
1239                                 else { /* start over in case this was deleted */
1240                                        /* since the list could be modified */
1241                                         read_lock(&GlobalSMBSeslock);
1242                                         cifsFileInfo_put(open_file);
1243                                         goto refind_writable;
1244                                 }
1245                         }
1246
1247                         /* if it fails, try another handle if possible -
1248                         (we can not do this if closePending since
1249                         loop could be modified - in which case we
1250                         have to start at the beginning of the list
1251                         again. Note that it would be bad
1252                         to hold up writepages here (rather than
1253                         in caller) with continuous retries */
1254                         cFYI(1, "wp failed on reopen file");
1255                         read_lock(&GlobalSMBSeslock);
1256                         /* can not use this handle, no write
1257                            pending on this one after all */
1258                         cifsFileInfo_put(open_file);
1259
1260                         if (open_file->closePend) /* list could have changed */
1261                                 goto refind_writable;
1262                         /* else we simply continue to the next entry. Thus
1263                            we do not loop on reopen errors.  If we
1264                            can not reopen the file, for example if we
1265                            reconnected to a server with another client
1266                            racing to delete or lock the file we would not
1267                            make progress if we restarted before the beginning
1268                            of the loop here. */
1269                 }
1270         }
1271         /* couldn't find useable FH with same pid, try any available */
1272         if (!any_available) {
1273                 any_available = true;
1274                 goto refind_writable;
1275         }
1276         read_unlock(&GlobalSMBSeslock);
1277         return NULL;
1278 }
1279
1280 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1281 {
1282         struct address_space *mapping = page->mapping;
1283         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1284         char *write_data;
1285         int rc = -EFAULT;
1286         int bytes_written = 0;
1287         struct cifs_sb_info *cifs_sb;
1288         struct cifsTconInfo *pTcon;
1289         struct inode *inode;
1290         struct cifsFileInfo *open_file;
1291
1292         if (!mapping || !mapping->host)
1293                 return -EFAULT;
1294
1295         inode = page->mapping->host;
1296         cifs_sb = CIFS_SB(inode->i_sb);
1297         pTcon = cifs_sb->tcon;
1298
1299         offset += (loff_t)from;
1300         write_data = kmap(page);
1301         write_data += from;
1302
1303         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1304                 kunmap(page);
1305                 return -EIO;
1306         }
1307
1308         /* racing with truncate? */
1309         if (offset > mapping->host->i_size) {
1310                 kunmap(page);
1311                 return 0; /* don't care */
1312         }
1313
1314         /* check to make sure that we are not extending the file */
1315         if (mapping->host->i_size - offset < (loff_t)to)
1316                 to = (unsigned)(mapping->host->i_size - offset);
1317
1318         open_file = find_writable_file(CIFS_I(mapping->host));
1319         if (open_file) {
1320                 bytes_written = cifs_write(open_file->pfile, write_data,
1321                                            to-from, &offset);
1322                 cifsFileInfo_put(open_file);
1323                 /* Does mm or vfs already set times? */
1324                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1325                 if ((bytes_written > 0) && (offset))
1326                         rc = 0;
1327                 else if (bytes_written < 0)
1328                         rc = bytes_written;
1329         } else {
1330                 cFYI(1, "No writeable filehandles for inode");
1331                 rc = -EIO;
1332         }
1333
1334         kunmap(page);
1335         return rc;
1336 }
1337
1338 static int cifs_writepages(struct address_space *mapping,
1339                            struct writeback_control *wbc)
1340 {
1341         struct backing_dev_info *bdi = mapping->backing_dev_info;
1342         unsigned int bytes_to_write;
1343         unsigned int bytes_written;
1344         struct cifs_sb_info *cifs_sb;
1345         int done = 0;
1346         pgoff_t end;
1347         pgoff_t index;
1348         int range_whole = 0;
1349         struct kvec *iov;
1350         int len;
1351         int n_iov = 0;
1352         pgoff_t next;
1353         int nr_pages;
1354         __u64 offset = 0;
1355         struct cifsFileInfo *open_file;
1356         struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1357         struct page *page;
1358         struct pagevec pvec;
1359         int rc = 0;
1360         int scanned = 0;
1361         int xid, long_op;
1362
1363         cifs_sb = CIFS_SB(mapping->host->i_sb);
1364
1365         /*
1366          * If wsize is smaller that the page cache size, default to writing
1367          * one page at a time via cifs_writepage
1368          */
1369         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1370                 return generic_writepages(mapping, wbc);
1371
1372         if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1373                 if (cifs_sb->tcon->ses->server->secMode &
1374                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1375                         if (!experimEnabled)
1376                                 return generic_writepages(mapping, wbc);
1377
1378         iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1379         if (iov == NULL)
1380                 return generic_writepages(mapping, wbc);
1381
1382
1383         /*
1384          * BB: Is this meaningful for a non-block-device file system?
1385          * If it is, we should test it again after we do I/O
1386          */
1387         if (wbc->nonblocking && bdi_write_congested(bdi)) {
1388                 wbc->encountered_congestion = 1;
1389                 kfree(iov);
1390                 return 0;
1391         }
1392
1393         xid = GetXid();
1394
1395         pagevec_init(&pvec, 0);
1396         if (wbc->range_cyclic) {
1397                 index = mapping->writeback_index; /* Start from prev offset */
1398                 end = -1;
1399         } else {
1400                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1401                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1402                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1403                         range_whole = 1;
1404                 scanned = 1;
1405         }
1406 retry:
1407         while (!done && (index <= end) &&
1408                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1409                         PAGECACHE_TAG_DIRTY,
1410                         min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1411                 int first;
1412                 unsigned int i;
1413
1414                 first = -1;
1415                 next = 0;
1416                 n_iov = 0;
1417                 bytes_to_write = 0;
1418
1419                 for (i = 0; i < nr_pages; i++) {
1420                         page = pvec.pages[i];
1421                         /*
1422                          * At this point we hold neither mapping->tree_lock nor
1423                          * lock on the page itself: the page may be truncated or
1424                          * invalidated (changing page->mapping to NULL), or even
1425                          * swizzled back from swapper_space to tmpfs file
1426                          * mapping
1427                          */
1428
1429                         if (first < 0)
1430                                 lock_page(page);
1431                         else if (!trylock_page(page))
1432                                 break;
1433
1434                         if (unlikely(page->mapping != mapping)) {
1435                                 unlock_page(page);
1436                                 break;
1437                         }
1438
1439                         if (!wbc->range_cyclic && page->index > end) {
1440                                 done = 1;
1441                                 unlock_page(page);
1442                                 break;
1443                         }
1444
1445                         if (next && (page->index != next)) {
1446                                 /* Not next consecutive page */
1447                                 unlock_page(page);
1448                                 break;
1449                         }
1450
1451                         if (wbc->sync_mode != WB_SYNC_NONE)
1452                                 wait_on_page_writeback(page);
1453
1454                         if (PageWriteback(page) ||
1455                                         !clear_page_dirty_for_io(page)) {
1456                                 unlock_page(page);
1457                                 break;
1458                         }
1459
1460                         /*
1461                          * This actually clears the dirty bit in the radix tree.
1462                          * See cifs_writepage() for more commentary.
1463                          */
1464                         set_page_writeback(page);
1465
1466                         if (page_offset(page) >= mapping->host->i_size) {
1467                                 done = 1;
1468                                 unlock_page(page);
1469                                 end_page_writeback(page);
1470                                 break;
1471                         }
1472
1473                         /*
1474                          * BB can we get rid of this?  pages are held by pvec
1475                          */
1476                         page_cache_get(page);
1477
1478                         len = min(mapping->host->i_size - page_offset(page),
1479                                   (loff_t)PAGE_CACHE_SIZE);
1480
1481                         /* reserve iov[0] for the smb header */
1482                         n_iov++;
1483                         iov[n_iov].iov_base = kmap(page);
1484                         iov[n_iov].iov_len = len;
1485                         bytes_to_write += len;
1486
1487                         if (first < 0) {
1488                                 first = i;
1489                                 offset = page_offset(page);
1490                         }
1491                         next = page->index + 1;
1492                         if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1493                                 break;
1494                 }
1495                 if (n_iov) {
1496                         /* Search for a writable handle every time we call
1497                          * CIFSSMBWrite2.  We can't rely on the last handle
1498                          * we used to still be valid
1499                          */
1500                         open_file = find_writable_file(CIFS_I(mapping->host));
1501                         if (!open_file) {
1502                                 cERROR(1, "No writable handles for inode");
1503                                 rc = -EBADF;
1504                         } else {
1505                                 long_op = cifs_write_timeout(cifsi, offset);
1506                                 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1507                                                    open_file->netfid,
1508                                                    bytes_to_write, offset,
1509                                                    &bytes_written, iov, n_iov,
1510                                                    long_op);
1511                                 cifsFileInfo_put(open_file);
1512                                 cifs_update_eof(cifsi, offset, bytes_written);
1513
1514                                 if (rc || bytes_written < bytes_to_write) {
1515                                         cERROR(1, "Write2 ret %d, wrote %d",
1516                                                   rc, bytes_written);
1517                                         /* BB what if continued retry is
1518                                            requested via mount flags? */
1519                                         if (rc == -ENOSPC)
1520                                                 set_bit(AS_ENOSPC, &mapping->flags);
1521                                         else
1522                                                 set_bit(AS_EIO, &mapping->flags);
1523                                 } else {
1524                                         cifs_stats_bytes_written(cifs_sb->tcon,
1525                                                                  bytes_written);
1526                                 }
1527                         }
1528                         for (i = 0; i < n_iov; i++) {
1529                                 page = pvec.pages[first + i];
1530                                 /* Should we also set page error on
1531                                 success rc but too little data written? */
1532                                 /* BB investigate retry logic on temporary
1533                                 server crash cases and how recovery works
1534                                 when page marked as error */
1535                                 if (rc)
1536                                         SetPageError(page);
1537                                 kunmap(page);
1538                                 unlock_page(page);
1539                                 end_page_writeback(page);
1540                                 page_cache_release(page);
1541                         }
1542                         if ((wbc->nr_to_write -= n_iov) <= 0)
1543                                 done = 1;
1544                         index = next;
1545                 } else
1546                         /* Need to re-find the pages we skipped */
1547                         index = pvec.pages[0]->index + 1;
1548
1549                 pagevec_release(&pvec);
1550         }
1551         if (!scanned && !done) {
1552                 /*
1553                  * We hit the last page and there is more work to be done: wrap
1554                  * back to the start of the file
1555                  */
1556                 scanned = 1;
1557                 index = 0;
1558                 goto retry;
1559         }
1560         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1561                 mapping->writeback_index = index;
1562
1563         FreeXid(xid);
1564         kfree(iov);
1565         return rc;
1566 }
1567
1568 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1569 {
1570         int rc = -EFAULT;
1571         int xid;
1572
1573         xid = GetXid();
1574 /* BB add check for wbc flags */
1575         page_cache_get(page);
1576         if (!PageUptodate(page))
1577                 cFYI(1, "ppw - page not up to date");
1578
1579         /*
1580          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1581          *
1582          * A writepage() implementation always needs to do either this,
1583          * or re-dirty the page with "redirty_page_for_writepage()" in
1584          * the case of a failure.
1585          *
1586          * Just unlocking the page will cause the radix tree tag-bits
1587          * to fail to update with the state of the page correctly.
1588          */
1589         set_page_writeback(page);
1590         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1591         SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1592         unlock_page(page);
1593         end_page_writeback(page);
1594         page_cache_release(page);
1595         FreeXid(xid);
1596         return rc;
1597 }
1598
1599 static int cifs_write_end(struct file *file, struct address_space *mapping,
1600                         loff_t pos, unsigned len, unsigned copied,
1601                         struct page *page, void *fsdata)
1602 {
1603         int rc;
1604         struct inode *inode = mapping->host;
1605
1606         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1607                  page, pos, copied);
1608
1609         if (PageChecked(page)) {
1610                 if (copied == len)
1611                         SetPageUptodate(page);
1612                 ClearPageChecked(page);
1613         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1614                 SetPageUptodate(page);
1615
1616         if (!PageUptodate(page)) {
1617                 char *page_data;
1618                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1619                 int xid;
1620
1621                 xid = GetXid();
1622                 /* this is probably better than directly calling
1623                    partialpage_write since in this function the file handle is
1624                    known which we might as well leverage */
1625                 /* BB check if anything else missing out of ppw
1626                    such as updating last write time */
1627                 page_data = kmap(page);
1628                 rc = cifs_write(file, page_data + offset, copied, &pos);
1629                 /* if (rc < 0) should we set writebehind rc? */
1630                 kunmap(page);
1631
1632                 FreeXid(xid);
1633         } else {
1634                 rc = copied;
1635                 pos += copied;
1636                 set_page_dirty(page);
1637         }
1638
1639         if (rc > 0) {
1640                 spin_lock(&inode->i_lock);
1641                 if (pos > inode->i_size)
1642                         i_size_write(inode, pos);
1643                 spin_unlock(&inode->i_lock);
1644         }
1645
1646         unlock_page(page);
1647         page_cache_release(page);
1648
1649         return rc;
1650 }
1651
1652 int cifs_fsync(struct file *file, int datasync)
1653 {
1654         int xid;
1655         int rc = 0;
1656         struct cifsTconInfo *tcon;
1657         struct cifsFileInfo *smbfile = file->private_data;
1658         struct inode *inode = file->f_path.dentry->d_inode;
1659
1660         xid = GetXid();
1661
1662         cFYI(1, "Sync file - name: %s datasync: 0x%x",
1663                 file->f_path.dentry->d_name.name, datasync);
1664
1665         rc = filemap_write_and_wait(inode->i_mapping);
1666         if (rc == 0) {
1667                 rc = CIFS_I(inode)->write_behind_rc;
1668                 CIFS_I(inode)->write_behind_rc = 0;
1669                 tcon = CIFS_SB(inode->i_sb)->tcon;
1670                 if (!rc && tcon && smbfile &&
1671                    !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1672                         rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1673         }
1674
1675         FreeXid(xid);
1676         return rc;
1677 }
1678
1679 /* static void cifs_sync_page(struct page *page)
1680 {
1681         struct address_space *mapping;
1682         struct inode *inode;
1683         unsigned long index = page->index;
1684         unsigned int rpages = 0;
1685         int rc = 0;
1686
1687         cFYI(1, "sync page %p", page);
1688         mapping = page->mapping;
1689         if (!mapping)
1690                 return 0;
1691         inode = mapping->host;
1692         if (!inode)
1693                 return; */
1694
1695 /*      fill in rpages then
1696         result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1697
1698 /*      cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1699
1700 #if 0
1701         if (rc < 0)
1702                 return rc;
1703         return 0;
1704 #endif
1705 } */
1706
1707 /*
1708  * As file closes, flush all cached write data for this inode checking
1709  * for write behind errors.
1710  */
1711 int cifs_flush(struct file *file, fl_owner_t id)
1712 {
1713         struct inode *inode = file->f_path.dentry->d_inode;
1714         int rc = 0;
1715
1716         /* Rather than do the steps manually:
1717            lock the inode for writing
1718            loop through pages looking for write behind data (dirty pages)
1719            coalesce into contiguous 16K (or smaller) chunks to write to server
1720            send to server (prefer in parallel)
1721            deal with writebehind errors
1722            unlock inode for writing
1723            filemapfdatawrite appears easier for the time being */
1724
1725         rc = filemap_fdatawrite(inode->i_mapping);
1726         /* reset wb rc if we were able to write out dirty pages */
1727         if (!rc) {
1728                 rc = CIFS_I(inode)->write_behind_rc;
1729                 CIFS_I(inode)->write_behind_rc = 0;
1730         }
1731
1732         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1733
1734         return rc;
1735 }
1736
1737 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1738         size_t read_size, loff_t *poffset)
1739 {
1740         int rc = -EACCES;
1741         unsigned int bytes_read = 0;
1742         unsigned int total_read = 0;
1743         unsigned int current_read_size;
1744         struct cifs_sb_info *cifs_sb;
1745         struct cifsTconInfo *pTcon;
1746         int xid;
1747         struct cifsFileInfo *open_file;
1748         char *smb_read_data;
1749         char __user *current_offset;
1750         struct smb_com_read_rsp *pSMBr;
1751
1752         xid = GetXid();
1753         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1754         pTcon = cifs_sb->tcon;
1755
1756         if (file->private_data == NULL) {
1757                 rc = -EBADF;
1758                 FreeXid(xid);
1759                 return rc;
1760         }
1761         open_file = file->private_data;
1762
1763         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1764                 cFYI(1, "attempting read on write only file instance");
1765
1766         for (total_read = 0, current_offset = read_data;
1767              read_size > total_read;
1768              total_read += bytes_read, current_offset += bytes_read) {
1769                 current_read_size = min_t(const int, read_size - total_read,
1770                                           cifs_sb->rsize);
1771                 rc = -EAGAIN;
1772                 smb_read_data = NULL;
1773                 while (rc == -EAGAIN) {
1774                         int buf_type = CIFS_NO_BUFFER;
1775                         if ((open_file->invalidHandle) &&
1776                             (!open_file->closePend)) {
1777                                 rc = cifs_reopen_file(file, true);
1778                                 if (rc != 0)
1779                                         break;
1780                         }
1781                         rc = CIFSSMBRead(xid, pTcon,
1782                                          open_file->netfid,
1783                                          current_read_size, *poffset,
1784                                          &bytes_read, &smb_read_data,
1785                                          &buf_type);
1786                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1787                         if (smb_read_data) {
1788                                 if (copy_to_user(current_offset,
1789                                                 smb_read_data +
1790                                                 4 /* RFC1001 length field */ +
1791                                                 le16_to_cpu(pSMBr->DataOffset),
1792                                                 bytes_read))
1793                                         rc = -EFAULT;
1794
1795                                 if (buf_type == CIFS_SMALL_BUFFER)
1796                                         cifs_small_buf_release(smb_read_data);
1797                                 else if (buf_type == CIFS_LARGE_BUFFER)
1798                                         cifs_buf_release(smb_read_data);
1799                                 smb_read_data = NULL;
1800                         }
1801                 }
1802                 if (rc || (bytes_read == 0)) {
1803                         if (total_read) {
1804                                 break;
1805                         } else {
1806                                 FreeXid(xid);
1807                                 return rc;
1808                         }
1809                 } else {
1810                         cifs_stats_bytes_read(pTcon, bytes_read);
1811                         *poffset += bytes_read;
1812                 }
1813         }
1814         FreeXid(xid);
1815         return total_read;
1816 }
1817
1818
1819 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1820         loff_t *poffset)
1821 {
1822         int rc = -EACCES;
1823         unsigned int bytes_read = 0;
1824         unsigned int total_read;
1825         unsigned int current_read_size;
1826         struct cifs_sb_info *cifs_sb;
1827         struct cifsTconInfo *pTcon;
1828         int xid;
1829         char *current_offset;
1830         struct cifsFileInfo *open_file;
1831         int buf_type = CIFS_NO_BUFFER;
1832
1833         xid = GetXid();
1834         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1835         pTcon = cifs_sb->tcon;
1836
1837         if (file->private_data == NULL) {
1838                 rc = -EBADF;
1839                 FreeXid(xid);
1840                 return rc;
1841         }
1842         open_file = file->private_data;
1843
1844         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1845                 cFYI(1, "attempting read on write only file instance");
1846
1847         for (total_read = 0, current_offset = read_data;
1848              read_size > total_read;
1849              total_read += bytes_read, current_offset += bytes_read) {
1850                 current_read_size = min_t(const int, read_size - total_read,
1851                                           cifs_sb->rsize);
1852                 /* For windows me and 9x we do not want to request more
1853                 than it negotiated since it will refuse the read then */
1854                 if ((pTcon->ses) &&
1855                         !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1856                         current_read_size = min_t(const int, current_read_size,
1857                                         pTcon->ses->server->maxBuf - 128);
1858                 }
1859                 rc = -EAGAIN;
1860                 while (rc == -EAGAIN) {
1861                         if ((open_file->invalidHandle) &&
1862                             (!open_file->closePend)) {
1863                                 rc = cifs_reopen_file(file, true);
1864                                 if (rc != 0)
1865                                         break;
1866                         }
1867                         rc = CIFSSMBRead(xid, pTcon,
1868                                          open_file->netfid,
1869                                          current_read_size, *poffset,
1870                                          &bytes_read, &current_offset,
1871                                          &buf_type);
1872                 }
1873                 if (rc || (bytes_read == 0)) {
1874                         if (total_read) {
1875                                 break;
1876                         } else {
1877                                 FreeXid(xid);
1878                                 return rc;
1879                         }
1880                 } else {
1881                         cifs_stats_bytes_read(pTcon, total_read);
1882                         *poffset += bytes_read;
1883                 }
1884         }
1885         FreeXid(xid);
1886         return total_read;
1887 }
1888
1889 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1890 {
1891         int rc, xid;
1892
1893         xid = GetXid();
1894         rc = cifs_revalidate_file(file);
1895         if (rc) {
1896                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1897                 FreeXid(xid);
1898                 return rc;
1899         }
1900         rc = generic_file_mmap(file, vma);
1901         FreeXid(xid);
1902         return rc;
1903 }
1904
1905
1906 static void cifs_copy_cache_pages(struct address_space *mapping,
1907         struct list_head *pages, int bytes_read, char *data)
1908 {
1909         struct page *page;
1910         char *target;
1911
1912         while (bytes_read > 0) {
1913                 if (list_empty(pages))
1914                         break;
1915
1916                 page = list_entry(pages->prev, struct page, lru);
1917                 list_del(&page->lru);
1918
1919                 if (add_to_page_cache_lru(page, mapping, page->index,
1920                                       GFP_KERNEL)) {
1921                         page_cache_release(page);
1922                         cFYI(1, "Add page cache failed");
1923                         data += PAGE_CACHE_SIZE;
1924                         bytes_read -= PAGE_CACHE_SIZE;
1925                         continue;
1926                 }
1927                 page_cache_release(page);
1928
1929                 target = kmap_atomic(page, KM_USER0);
1930
1931                 if (PAGE_CACHE_SIZE > bytes_read) {
1932                         memcpy(target, data, bytes_read);
1933                         /* zero the tail end of this partial page */
1934                         memset(target + bytes_read, 0,
1935                                PAGE_CACHE_SIZE - bytes_read);
1936                         bytes_read = 0;
1937                 } else {
1938                         memcpy(target, data, PAGE_CACHE_SIZE);
1939                         bytes_read -= PAGE_CACHE_SIZE;
1940                 }
1941                 kunmap_atomic(target, KM_USER0);
1942
1943                 flush_dcache_page(page);
1944                 SetPageUptodate(page);
1945                 unlock_page(page);
1946                 data += PAGE_CACHE_SIZE;
1947
1948                 /* add page to FS-Cache */
1949                 cifs_readpage_to_fscache(mapping->host, page);
1950         }
1951         return;
1952 }
1953
1954 static int cifs_readpages(struct file *file, struct address_space *mapping,
1955         struct list_head *page_list, unsigned num_pages)
1956 {
1957         int rc = -EACCES;
1958         int xid;
1959         loff_t offset;
1960         struct page *page;
1961         struct cifs_sb_info *cifs_sb;
1962         struct cifsTconInfo *pTcon;
1963         unsigned int bytes_read = 0;
1964         unsigned int read_size, i;
1965         char *smb_read_data = NULL;
1966         struct smb_com_read_rsp *pSMBr;
1967         struct cifsFileInfo *open_file;
1968         int buf_type = CIFS_NO_BUFFER;
1969
1970         xid = GetXid();
1971         if (file->private_data == NULL) {
1972                 rc = -EBADF;
1973                 FreeXid(xid);
1974                 return rc;
1975         }
1976         open_file = file->private_data;
1977         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1978         pTcon = cifs_sb->tcon;
1979
1980         /*
1981          * Reads as many pages as possible from fscache. Returns -ENOBUFS
1982          * immediately if the cookie is negative
1983          */
1984         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
1985                                          &num_pages);
1986         if (rc == 0)
1987                 goto read_complete;
1988
1989         cFYI(DBG2, "rpages: num pages %d", num_pages);
1990         for (i = 0; i < num_pages; ) {
1991                 unsigned contig_pages;
1992                 struct page *tmp_page;
1993                 unsigned long expected_index;
1994
1995                 if (list_empty(page_list))
1996                         break;
1997
1998                 page = list_entry(page_list->prev, struct page, lru);
1999                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2000
2001                 /* count adjacent pages that we will read into */
2002                 contig_pages = 0;
2003                 expected_index =
2004                         list_entry(page_list->prev, struct page, lru)->index;
2005                 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2006                         if (tmp_page->index == expected_index) {
2007                                 contig_pages++;
2008                                 expected_index++;
2009                         } else
2010                                 break;
2011                 }
2012                 if (contig_pages + i >  num_pages)
2013                         contig_pages = num_pages - i;
2014
2015                 /* for reads over a certain size could initiate async
2016                    read ahead */
2017
2018                 read_size = contig_pages * PAGE_CACHE_SIZE;
2019                 /* Read size needs to be in multiples of one page */
2020                 read_size = min_t(const unsigned int, read_size,
2021                                   cifs_sb->rsize & PAGE_CACHE_MASK);
2022                 cFYI(DBG2, "rpages: read size 0x%x  contiguous pages %d",
2023                                 read_size, contig_pages);
2024                 rc = -EAGAIN;
2025                 while (rc == -EAGAIN) {
2026                         if ((open_file->invalidHandle) &&
2027                             (!open_file->closePend)) {
2028                                 rc = cifs_reopen_file(file, true);
2029                                 if (rc != 0)
2030                                         break;
2031                         }
2032
2033                         rc = CIFSSMBRead(xid, pTcon,
2034                                          open_file->netfid,
2035                                          read_size, offset,
2036                                          &bytes_read, &smb_read_data,
2037                                          &buf_type);
2038                         /* BB more RC checks ? */
2039                         if (rc == -EAGAIN) {
2040                                 if (smb_read_data) {
2041                                         if (buf_type == CIFS_SMALL_BUFFER)
2042                                                 cifs_small_buf_release(smb_read_data);
2043                                         else if (buf_type == CIFS_LARGE_BUFFER)
2044                                                 cifs_buf_release(smb_read_data);
2045                                         smb_read_data = NULL;
2046                                 }
2047                         }
2048                 }
2049                 if ((rc < 0) || (smb_read_data == NULL)) {
2050                         cFYI(1, "Read error in readpages: %d", rc);
2051                         break;
2052                 } else if (bytes_read > 0) {
2053                         task_io_account_read(bytes_read);
2054                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2055                         cifs_copy_cache_pages(mapping, page_list, bytes_read,
2056                                 smb_read_data + 4 /* RFC1001 hdr */ +
2057                                 le16_to_cpu(pSMBr->DataOffset));
2058
2059                         i +=  bytes_read >> PAGE_CACHE_SHIFT;
2060                         cifs_stats_bytes_read(pTcon, bytes_read);
2061                         if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2062                                 i++; /* account for partial page */
2063
2064                                 /* server copy of file can have smaller size
2065                                    than client */
2066                                 /* BB do we need to verify this common case ?
2067                                    this case is ok - if we are at server EOF
2068                                    we will hit it on next read */
2069
2070                                 /* break; */
2071                         }
2072                 } else {
2073                         cFYI(1, "No bytes read (%d) at offset %lld . "
2074                                 "Cleaning remaining pages from readahead list",
2075                                 bytes_read, offset);
2076                         /* BB turn off caching and do new lookup on
2077                            file size at server? */
2078                         break;
2079                 }
2080                 if (smb_read_data) {
2081                         if (buf_type == CIFS_SMALL_BUFFER)
2082                                 cifs_small_buf_release(smb_read_data);
2083                         else if (buf_type == CIFS_LARGE_BUFFER)
2084                                 cifs_buf_release(smb_read_data);
2085                         smb_read_data = NULL;
2086                 }
2087                 bytes_read = 0;
2088         }
2089
2090 /* need to free smb_read_data buf before exit */
2091         if (smb_read_data) {
2092                 if (buf_type == CIFS_SMALL_BUFFER)
2093                         cifs_small_buf_release(smb_read_data);
2094                 else if (buf_type == CIFS_LARGE_BUFFER)
2095                         cifs_buf_release(smb_read_data);
2096                 smb_read_data = NULL;
2097         }
2098
2099 read_complete:
2100         FreeXid(xid);
2101         return rc;
2102 }
2103
2104 static int cifs_readpage_worker(struct file *file, struct page *page,
2105         loff_t *poffset)
2106 {
2107         char *read_data;
2108         int rc;
2109
2110         /* Is the page cached? */
2111         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2112         if (rc == 0)
2113                 goto read_complete;
2114
2115         page_cache_get(page);
2116         read_data = kmap(page);
2117         /* for reads over a certain size could initiate async read ahead */
2118
2119         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2120
2121         if (rc < 0)
2122                 goto io_error;
2123         else
2124                 cFYI(1, "Bytes read %d", rc);
2125
2126         file->f_path.dentry->d_inode->i_atime =
2127                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2128
2129         if (PAGE_CACHE_SIZE > rc)
2130                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2131
2132         flush_dcache_page(page);
2133         SetPageUptodate(page);
2134
2135         /* send this page to the cache */
2136         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2137
2138         rc = 0;
2139
2140 io_error:
2141         kunmap(page);
2142         page_cache_release(page);
2143
2144 read_complete:
2145         return rc;
2146 }
2147
2148 static int cifs_readpage(struct file *file, struct page *page)
2149 {
2150         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2151         int rc = -EACCES;
2152         int xid;
2153
2154         xid = GetXid();
2155
2156         if (file->private_data == NULL) {
2157                 rc = -EBADF;
2158                 FreeXid(xid);
2159                 return rc;
2160         }
2161
2162         cFYI(1, "readpage %p at offset %d 0x%x\n",
2163                  page, (int)offset, (int)offset);
2164
2165         rc = cifs_readpage_worker(file, page, &offset);
2166
2167         unlock_page(page);
2168
2169         FreeXid(xid);
2170         return rc;
2171 }
2172
2173 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2174 {
2175         struct cifsFileInfo *open_file;
2176
2177         read_lock(&GlobalSMBSeslock);
2178         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2179                 if (open_file->closePend)
2180                         continue;
2181                 if (open_file->pfile &&
2182                     ((open_file->pfile->f_flags & O_RDWR) ||
2183                      (open_file->pfile->f_flags & O_WRONLY))) {
2184                         read_unlock(&GlobalSMBSeslock);
2185                         return 1;
2186                 }
2187         }
2188         read_unlock(&GlobalSMBSeslock);
2189         return 0;
2190 }
2191
2192 /* We do not want to update the file size from server for inodes
2193    open for write - to avoid races with writepage extending
2194    the file - in the future we could consider allowing
2195    refreshing the inode only on increases in the file size
2196    but this is tricky to do without racing with writebehind
2197    page caching in the current Linux kernel design */
2198 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2199 {
2200         if (!cifsInode)
2201                 return true;
2202
2203         if (is_inode_writable(cifsInode)) {
2204                 /* This inode is open for write at least once */
2205                 struct cifs_sb_info *cifs_sb;
2206
2207                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2208                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2209                         /* since no page cache to corrupt on directio
2210                         we can change size safely */
2211                         return true;
2212                 }
2213
2214                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2215                         return true;
2216
2217                 return false;
2218         } else
2219                 return true;
2220 }
2221
2222 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2223                         loff_t pos, unsigned len, unsigned flags,
2224                         struct page **pagep, void **fsdata)
2225 {
2226         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2227         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2228         loff_t page_start = pos & PAGE_MASK;
2229         loff_t i_size;
2230         struct page *page;
2231         int rc = 0;
2232
2233         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2234
2235         page = grab_cache_page_write_begin(mapping, index, flags);
2236         if (!page) {
2237                 rc = -ENOMEM;
2238                 goto out;
2239         }
2240
2241         if (PageUptodate(page))
2242                 goto out;
2243
2244         /*
2245          * If we write a full page it will be up to date, no need to read from
2246          * the server. If the write is short, we'll end up doing a sync write
2247          * instead.
2248          */
2249         if (len == PAGE_CACHE_SIZE)
2250                 goto out;
2251
2252         /*
2253          * optimize away the read when we have an oplock, and we're not
2254          * expecting to use any of the data we'd be reading in. That
2255          * is, when the page lies beyond the EOF, or straddles the EOF
2256          * and the write will cover all of the existing data.
2257          */
2258         if (CIFS_I(mapping->host)->clientCanCacheRead) {
2259                 i_size = i_size_read(mapping->host);
2260                 if (page_start >= i_size ||
2261                     (offset == 0 && (pos + len) >= i_size)) {
2262                         zero_user_segments(page, 0, offset,
2263                                            offset + len,
2264                                            PAGE_CACHE_SIZE);
2265                         /*
2266                          * PageChecked means that the parts of the page
2267                          * to which we're not writing are considered up
2268                          * to date. Once the data is copied to the
2269                          * page, it can be set uptodate.
2270                          */
2271                         SetPageChecked(page);
2272                         goto out;
2273                 }
2274         }
2275
2276         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2277                 /*
2278                  * might as well read a page, it is fast enough. If we get
2279                  * an error, we don't need to return it. cifs_write_end will
2280                  * do a sync write instead since PG_uptodate isn't set.
2281                  */
2282                 cifs_readpage_worker(file, page, &page_start);
2283         } else {
2284                 /* we could try using another file handle if there is one -
2285                    but how would we lock it to prevent close of that handle
2286                    racing with this read? In any case
2287                    this will be written out by write_end so is fine */
2288         }
2289 out:
2290         *pagep = page;
2291         return rc;
2292 }
2293
2294 static int cifs_release_page(struct page *page, gfp_t gfp)
2295 {
2296         if (PagePrivate(page))
2297                 return 0;
2298
2299         return cifs_fscache_release_page(page, gfp);
2300 }
2301
2302 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2303 {
2304         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2305
2306         if (offset == 0)
2307                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2308 }
2309
2310 static void
2311 cifs_oplock_break(struct slow_work *work)
2312 {
2313         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2314                                                   oplock_break);
2315         struct inode *inode = cfile->pInode;
2316         struct cifsInodeInfo *cinode = CIFS_I(inode);
2317         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb);
2318         int rc, waitrc = 0;
2319
2320         if (inode && S_ISREG(inode->i_mode)) {
2321                 if (cinode->clientCanCacheRead)
2322                         break_lease(inode, O_RDONLY);
2323                 else
2324                         break_lease(inode, O_WRONLY);
2325                 rc = filemap_fdatawrite(inode->i_mapping);
2326                 if (cinode->clientCanCacheRead == 0) {
2327                         waitrc = filemap_fdatawait(inode->i_mapping);
2328                         invalidate_remote_inode(inode);
2329                 }
2330                 if (!rc)
2331                         rc = waitrc;
2332                 if (rc)
2333                         cinode->write_behind_rc = rc;
2334                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2335         }
2336
2337         /*
2338          * releasing stale oplock after recent reconnect of smb session using
2339          * a now incorrect file handle is not a data integrity issue but do
2340          * not bother sending an oplock release if session to server still is
2341          * disconnected since oplock already released by the server
2342          */
2343         if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2344                 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2345                                  LOCKING_ANDX_OPLOCK_RELEASE, false);
2346                 cFYI(1, "Oplock release rc = %d", rc);
2347         }
2348 }
2349
2350 static int
2351 cifs_oplock_break_get(struct slow_work *work)
2352 {
2353         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2354                                                   oplock_break);
2355         mntget(cfile->mnt);
2356         cifsFileInfo_get(cfile);
2357         return 0;
2358 }
2359
2360 static void
2361 cifs_oplock_break_put(struct slow_work *work)
2362 {
2363         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2364                                                   oplock_break);
2365         mntput(cfile->mnt);
2366         cifsFileInfo_put(cfile);
2367 }
2368
2369 const struct slow_work_ops cifs_oplock_break_ops = {
2370         .get_ref        = cifs_oplock_break_get,
2371         .put_ref        = cifs_oplock_break_put,
2372         .execute        = cifs_oplock_break,
2373 };
2374
2375 const struct address_space_operations cifs_addr_ops = {
2376         .readpage = cifs_readpage,
2377         .readpages = cifs_readpages,
2378         .writepage = cifs_writepage,
2379         .writepages = cifs_writepages,
2380         .write_begin = cifs_write_begin,
2381         .write_end = cifs_write_end,
2382         .set_page_dirty = __set_page_dirty_nobuffers,
2383         .releasepage = cifs_release_page,
2384         .invalidatepage = cifs_invalidate_page,
2385         /* .sync_page = cifs_sync_page, */
2386         /* .direct_IO = */
2387 };
2388
2389 /*
2390  * cifs_readpages requires the server to support a buffer large enough to
2391  * contain the header plus one complete page of data.  Otherwise, we need
2392  * to leave cifs_readpages out of the address space operations.
2393  */
2394 const struct address_space_operations cifs_addr_ops_smallbuf = {
2395         .readpage = cifs_readpage,
2396         .writepage = cifs_writepage,
2397         .writepages = cifs_writepages,
2398         .write_begin = cifs_write_begin,
2399         .write_end = cifs_write_end,
2400         .set_page_dirty = __set_page_dirty_nobuffers,
2401         .releasepage = cifs_release_page,
2402         .invalidatepage = cifs_invalidate_page,
2403         /* .sync_page = cifs_sync_page, */
2404         /* .direct_IO = */
2405 };