Merge tag 'linux-kselftest-5.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256 out:
257         kfree(buf);
258         return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264         struct cifs_fid_locks *cur;
265         bool has_locks = false;
266
267         down_read(&cinode->lock_sem);
268         list_for_each_entry(cur, &cinode->llist, llist) {
269                 if (!list_empty(&cur->locks)) {
270                         has_locks = true;
271                         break;
272                 }
273         }
274         up_read(&cinode->lock_sem);
275         return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280                   struct tcon_link *tlink, __u32 oplock)
281 {
282         struct dentry *dentry = file_dentry(file);
283         struct inode *inode = d_inode(dentry);
284         struct cifsInodeInfo *cinode = CIFS_I(inode);
285         struct cifsFileInfo *cfile;
286         struct cifs_fid_locks *fdlocks;
287         struct cifs_tcon *tcon = tlink_tcon(tlink);
288         struct TCP_Server_Info *server = tcon->ses->server;
289
290         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291         if (cfile == NULL)
292                 return cfile;
293
294         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295         if (!fdlocks) {
296                 kfree(cfile);
297                 return NULL;
298         }
299
300         INIT_LIST_HEAD(&fdlocks->locks);
301         fdlocks->cfile = cfile;
302         cfile->llist = fdlocks;
303         down_write(&cinode->lock_sem);
304         list_add(&fdlocks->llist, &cinode->llist);
305         up_write(&cinode->lock_sem);
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->tlink = cifs_get_tlink(tlink);
314         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315         mutex_init(&cfile->fh_mutex);
316         spin_lock_init(&cfile->file_info_lock);
317
318         cifs_sb_active(inode->i_sb);
319
320         /*
321          * If the server returned a read oplock and we have mandatory brlocks,
322          * set oplock level to None.
323          */
324         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326                 oplock = 0;
327         }
328
329         spin_lock(&tcon->open_file_lock);
330         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331                 oplock = fid->pending_open->oplock;
332         list_del(&fid->pending_open->olist);
333
334         fid->purge_cache = false;
335         server->ops->set_fid(cfile, fid, oplock);
336
337         list_add(&cfile->tlist, &tcon->openFileList);
338         atomic_inc(&tcon->num_local_opens);
339
340         /* if readable file instance put first in list*/
341         if (file->f_mode & FMODE_READ)
342                 list_add(&cfile->flist, &cinode->openFileList);
343         else
344                 list_add_tail(&cfile->flist, &cinode->openFileList);
345         spin_unlock(&tcon->open_file_lock);
346
347         if (fid->purge_cache)
348                 cifs_zap_mapping(inode);
349
350         file->private_data = cfile;
351         return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357         spin_lock(&cifs_file->file_info_lock);
358         cifsFileInfo_get_locked(cifs_file);
359         spin_unlock(&cifs_file->file_info_lock);
360         return cifs_file;
361 }
362
363 /*
364  * Release a reference on the file private data. This may involve closing
365  * the filehandle out on the server. Must be called without holding
366  * tcon->open_file_lock and cifs_file->file_info_lock.
367  */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370         struct inode *inode = d_inode(cifs_file->dentry);
371         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372         struct TCP_Server_Info *server = tcon->ses->server;
373         struct cifsInodeInfo *cifsi = CIFS_I(inode);
374         struct super_block *sb = inode->i_sb;
375         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376         struct cifsLockInfo *li, *tmp;
377         struct cifs_fid fid;
378         struct cifs_pending_open open;
379         bool oplock_break_cancelled;
380
381         spin_lock(&tcon->open_file_lock);
382
383         spin_lock(&cifs_file->file_info_lock);
384         if (--cifs_file->count > 0) {
385                 spin_unlock(&cifs_file->file_info_lock);
386                 spin_unlock(&tcon->open_file_lock);
387                 return;
388         }
389         spin_unlock(&cifs_file->file_info_lock);
390
391         if (server->ops->get_lease_key)
392                 server->ops->get_lease_key(inode, &fid);
393
394         /* store open in pending opens to make sure we don't miss lease break */
395         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397         /* remove it from the lists */
398         list_del(&cifs_file->flist);
399         list_del(&cifs_file->tlist);
400         atomic_dec(&tcon->num_local_opens);
401
402         if (list_empty(&cifsi->openFileList)) {
403                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404                          d_inode(cifs_file->dentry));
405                 /*
406                  * In strict cache mode we need invalidate mapping on the last
407                  * close  because it may cause a error when we open this file
408                  * again and get at least level II oplock.
409                  */
410                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412                 cifs_set_oplock_level(cifsi, 0);
413         }
414
415         spin_unlock(&tcon->open_file_lock);
416
417         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420                 struct TCP_Server_Info *server = tcon->ses->server;
421                 unsigned int xid;
422
423                 xid = get_xid();
424                 if (server->ops->close)
425                         server->ops->close(xid, tcon, &cifs_file->fid);
426                 _free_xid(xid);
427         }
428
429         if (oplock_break_cancelled)
430                 cifs_done_oplock_break(cifsi);
431
432         cifs_del_pending_open(&open);
433
434         /*
435          * Delete any outstanding lock records. We'll lose them when the file
436          * is closed anyway.
437          */
438         down_write(&cifsi->lock_sem);
439         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440                 list_del(&li->llist);
441                 cifs_del_lock_waiters(li);
442                 kfree(li);
443         }
444         list_del(&cifs_file->llist->llist);
445         kfree(cifs_file->llist);
446         up_write(&cifsi->lock_sem);
447
448         cifs_put_tlink(cifs_file->tlink);
449         dput(cifs_file->dentry);
450         cifs_sb_deactive(sb);
451         kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457         int rc = -EACCES;
458         unsigned int xid;
459         __u32 oplock;
460         struct cifs_sb_info *cifs_sb;
461         struct TCP_Server_Info *server;
462         struct cifs_tcon *tcon;
463         struct tcon_link *tlink;
464         struct cifsFileInfo *cfile = NULL;
465         char *full_path = NULL;
466         bool posix_open_ok = false;
467         struct cifs_fid fid;
468         struct cifs_pending_open open;
469
470         xid = get_xid();
471
472         cifs_sb = CIFS_SB(inode->i_sb);
473         tlink = cifs_sb_tlink(cifs_sb);
474         if (IS_ERR(tlink)) {
475                 free_xid(xid);
476                 return PTR_ERR(tlink);
477         }
478         tcon = tlink_tcon(tlink);
479         server = tcon->ses->server;
480
481         full_path = build_path_from_dentry(file_dentry(file));
482         if (full_path == NULL) {
483                 rc = -ENOMEM;
484                 goto out;
485         }
486
487         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488                  inode, file->f_flags, full_path);
489
490         if (file->f_flags & O_DIRECT &&
491             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493                         file->f_op = &cifs_file_direct_nobrl_ops;
494                 else
495                         file->f_op = &cifs_file_direct_ops;
496         }
497
498         if (server->oplocks)
499                 oplock = REQ_OPLOCK;
500         else
501                 oplock = 0;
502
503         if (!tcon->broken_posix_open && tcon->unix_ext &&
504             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506                 /* can not refresh inode info since size could be stale */
507                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508                                 cifs_sb->mnt_file_mode /* ignored */,
509                                 file->f_flags, &oplock, &fid.netfid, xid);
510                 if (rc == 0) {
511                         cifs_dbg(FYI, "posix open succeeded\n");
512                         posix_open_ok = true;
513                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514                         if (tcon->ses->serverNOS)
515                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516                                          tcon->ses->serverName,
517                                          tcon->ses->serverNOS);
518                         tcon->broken_posix_open = true;
519                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520                          (rc != -EOPNOTSUPP)) /* path not found or net err */
521                         goto out;
522                 /*
523                  * Else fallthrough to retry open the old way on network i/o
524                  * or DFS errors.
525                  */
526         }
527
528         if (server->ops->get_lease_key)
529                 server->ops->get_lease_key(inode, &fid);
530
531         cifs_add_pending_open(&fid, tlink, &open);
532
533         if (!posix_open_ok) {
534                 if (server->ops->get_lease_key)
535                         server->ops->get_lease_key(inode, &fid);
536
537                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538                                   file->f_flags, &oplock, &fid, xid);
539                 if (rc) {
540                         cifs_del_pending_open(&open);
541                         goto out;
542                 }
543         }
544
545         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546         if (cfile == NULL) {
547                 if (server->ops->close)
548                         server->ops->close(xid, tcon, &fid);
549                 cifs_del_pending_open(&open);
550                 rc = -ENOMEM;
551                 goto out;
552         }
553
554         cifs_fscache_set_inode_cookie(inode, file);
555
556         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557                 /*
558                  * Time to set mode which we can not set earlier due to
559                  * problems creating new read-only files.
560                  */
561                 struct cifs_unix_set_info_args args = {
562                         .mode   = inode->i_mode,
563                         .uid    = INVALID_UID, /* no change */
564                         .gid    = INVALID_GID, /* no change */
565                         .ctime  = NO_CHANGE_64,
566                         .atime  = NO_CHANGE_64,
567                         .mtime  = NO_CHANGE_64,
568                         .device = 0,
569                 };
570                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571                                        cfile->pid);
572         }
573
574 out:
575         kfree(full_path);
576         free_xid(xid);
577         cifs_put_tlink(tlink);
578         return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584  * Try to reacquire byte range locks that were released when session
585  * to server was lost.
586  */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593         int rc = 0;
594
595         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596         if (cinode->can_cache_brlcks) {
597                 /* can cache locks - no need to relock */
598                 up_read(&cinode->lock_sem);
599                 return rc;
600         }
601
602         if (cap_unix(tcon->ses) &&
603             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605                 rc = cifs_push_posix_locks(cfile);
606         else
607                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609         up_read(&cinode->lock_sem);
610         return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616         int rc = -EACCES;
617         unsigned int xid;
618         __u32 oplock;
619         struct cifs_sb_info *cifs_sb;
620         struct cifs_tcon *tcon;
621         struct TCP_Server_Info *server;
622         struct cifsInodeInfo *cinode;
623         struct inode *inode;
624         char *full_path = NULL;
625         int desired_access;
626         int disposition = FILE_OPEN;
627         int create_options = CREATE_NOT_DIR;
628         struct cifs_open_parms oparms;
629
630         xid = get_xid();
631         mutex_lock(&cfile->fh_mutex);
632         if (!cfile->invalidHandle) {
633                 mutex_unlock(&cfile->fh_mutex);
634                 rc = 0;
635                 free_xid(xid);
636                 return rc;
637         }
638
639         inode = d_inode(cfile->dentry);
640         cifs_sb = CIFS_SB(inode->i_sb);
641         tcon = tlink_tcon(cfile->tlink);
642         server = tcon->ses->server;
643
644         /*
645          * Can not grab rename sem here because various ops, including those
646          * that already have the rename sem can end up causing writepage to get
647          * called and if the server was down that means we end up here, and we
648          * can never tell if the caller already has the rename_sem.
649          */
650         full_path = build_path_from_dentry(cfile->dentry);
651         if (full_path == NULL) {
652                 rc = -ENOMEM;
653                 mutex_unlock(&cfile->fh_mutex);
654                 free_xid(xid);
655                 return rc;
656         }
657
658         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659                  inode, cfile->f_flags, full_path);
660
661         if (tcon->ses->server->oplocks)
662                 oplock = REQ_OPLOCK;
663         else
664                 oplock = 0;
665
666         if (tcon->unix_ext && cap_unix(tcon->ses) &&
667             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669                 /*
670                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671                  * original open. Must mask them off for a reopen.
672                  */
673                 unsigned int oflags = cfile->f_flags &
674                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677                                      cifs_sb->mnt_file_mode /* ignored */,
678                                      oflags, &oplock, &cfile->fid.netfid, xid);
679                 if (rc == 0) {
680                         cifs_dbg(FYI, "posix reopen succeeded\n");
681                         oparms.reconnect = true;
682                         goto reopen_success;
683                 }
684                 /*
685                  * fallthrough to retry open the old way on errors, especially
686                  * in the reconnect path it is important to retry hard
687                  */
688         }
689
690         desired_access = cifs_convert_flags(cfile->f_flags);
691
692         if (backup_cred(cifs_sb))
693                 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695         if (server->ops->get_lease_key)
696                 server->ops->get_lease_key(inode, &cfile->fid);
697
698         oparms.tcon = tcon;
699         oparms.cifs_sb = cifs_sb;
700         oparms.desired_access = desired_access;
701         oparms.create_options = create_options;
702         oparms.disposition = disposition;
703         oparms.path = full_path;
704         oparms.fid = &cfile->fid;
705         oparms.reconnect = true;
706
707         /*
708          * Can not refresh inode by passing in file_info buf to be returned by
709          * ops->open and then calling get_inode_info with returned buf since
710          * file might have write behind data that needs to be flushed and server
711          * version of file size can be stale. If we knew for sure that inode was
712          * not dirty locally we could do this.
713          */
714         rc = server->ops->open(xid, &oparms, &oplock, NULL);
715         if (rc == -ENOENT && oparms.reconnect == false) {
716                 /* durable handle timeout is expired - open the file again */
717                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718                 /* indicate that we need to relock the file */
719                 oparms.reconnect = true;
720         }
721
722         if (rc) {
723                 mutex_unlock(&cfile->fh_mutex);
724                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725                 cifs_dbg(FYI, "oplock: %d\n", oplock);
726                 goto reopen_error_exit;
727         }
728
729 reopen_success:
730         cfile->invalidHandle = false;
731         mutex_unlock(&cfile->fh_mutex);
732         cinode = CIFS_I(inode);
733
734         if (can_flush) {
735                 rc = filemap_write_and_wait(inode->i_mapping);
736                 if (!is_interrupt_error(rc))
737                         mapping_set_error(inode->i_mapping, rc);
738
739                 if (tcon->unix_ext)
740                         rc = cifs_get_inode_info_unix(&inode, full_path,
741                                                       inode->i_sb, xid);
742                 else
743                         rc = cifs_get_inode_info(&inode, full_path, NULL,
744                                                  inode->i_sb, xid, NULL);
745         }
746         /*
747          * Else we are writing out data to server already and could deadlock if
748          * we tried to flush data, and since we do not know if we have data that
749          * would invalidate the current end of file on the server we can not go
750          * to the server to get the new inode info.
751          */
752
753         /*
754          * If the server returned a read oplock and we have mandatory brlocks,
755          * set oplock level to None.
756          */
757         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
758                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
759                 oplock = 0;
760         }
761
762         server->ops->set_fid(cfile, &cfile->fid, oplock);
763         if (oparms.reconnect)
764                 cifs_relock_file(cfile);
765
766 reopen_error_exit:
767         kfree(full_path);
768         free_xid(xid);
769         return rc;
770 }
771
772 int cifs_close(struct inode *inode, struct file *file)
773 {
774         if (file->private_data != NULL) {
775                 cifsFileInfo_put(file->private_data);
776                 file->private_data = NULL;
777         }
778
779         /* return code from the ->release op is always ignored */
780         return 0;
781 }
782
783 void
784 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
785 {
786         struct cifsFileInfo *open_file;
787         struct list_head *tmp;
788         struct list_head *tmp1;
789         struct list_head tmp_list;
790
791         if (!tcon->use_persistent || !tcon->need_reopen_files)
792                 return;
793
794         tcon->need_reopen_files = false;
795
796         cifs_dbg(FYI, "Reopen persistent handles");
797         INIT_LIST_HEAD(&tmp_list);
798
799         /* list all files open on tree connection, reopen resilient handles  */
800         spin_lock(&tcon->open_file_lock);
801         list_for_each(tmp, &tcon->openFileList) {
802                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803                 if (!open_file->invalidHandle)
804                         continue;
805                 cifsFileInfo_get(open_file);
806                 list_add_tail(&open_file->rlist, &tmp_list);
807         }
808         spin_unlock(&tcon->open_file_lock);
809
810         list_for_each_safe(tmp, tmp1, &tmp_list) {
811                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812                 if (cifs_reopen_file(open_file, false /* do not flush */))
813                         tcon->need_reopen_files = true;
814                 list_del_init(&open_file->rlist);
815                 cifsFileInfo_put(open_file);
816         }
817 }
818
819 int cifs_closedir(struct inode *inode, struct file *file)
820 {
821         int rc = 0;
822         unsigned int xid;
823         struct cifsFileInfo *cfile = file->private_data;
824         struct cifs_tcon *tcon;
825         struct TCP_Server_Info *server;
826         char *buf;
827
828         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
829
830         if (cfile == NULL)
831                 return rc;
832
833         xid = get_xid();
834         tcon = tlink_tcon(cfile->tlink);
835         server = tcon->ses->server;
836
837         cifs_dbg(FYI, "Freeing private data in close dir\n");
838         spin_lock(&cfile->file_info_lock);
839         if (server->ops->dir_needs_close(cfile)) {
840                 cfile->invalidHandle = true;
841                 spin_unlock(&cfile->file_info_lock);
842                 if (server->ops->close_dir)
843                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
844                 else
845                         rc = -ENOSYS;
846                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847                 /* not much we can do if it fails anyway, ignore rc */
848                 rc = 0;
849         } else
850                 spin_unlock(&cfile->file_info_lock);
851
852         buf = cfile->srch_inf.ntwrk_buf_start;
853         if (buf) {
854                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855                 cfile->srch_inf.ntwrk_buf_start = NULL;
856                 if (cfile->srch_inf.smallBuf)
857                         cifs_small_buf_release(buf);
858                 else
859                         cifs_buf_release(buf);
860         }
861
862         cifs_put_tlink(cfile->tlink);
863         kfree(file->private_data);
864         file->private_data = NULL;
865         /* BB can we lock the filestruct while this is going on? */
866         free_xid(xid);
867         return rc;
868 }
869
870 static struct cifsLockInfo *
871 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
872 {
873         struct cifsLockInfo *lock =
874                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875         if (!lock)
876                 return lock;
877         lock->offset = offset;
878         lock->length = length;
879         lock->type = type;
880         lock->pid = current->tgid;
881         lock->flags = flags;
882         INIT_LIST_HEAD(&lock->blist);
883         init_waitqueue_head(&lock->block_q);
884         return lock;
885 }
886
887 void
888 cifs_del_lock_waiters(struct cifsLockInfo *lock)
889 {
890         struct cifsLockInfo *li, *tmp;
891         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
892                 list_del_init(&li->blist);
893                 wake_up(&li->block_q);
894         }
895 }
896
897 #define CIFS_LOCK_OP    0
898 #define CIFS_READ_OP    1
899 #define CIFS_WRITE_OP   2
900
901 /* @rw_check : 0 - no op, 1 - read, 2 - write */
902 static bool
903 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904                             __u64 length, __u8 type, __u16 flags,
905                             struct cifsFileInfo *cfile,
906                             struct cifsLockInfo **conf_lock, int rw_check)
907 {
908         struct cifsLockInfo *li;
909         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911
912         list_for_each_entry(li, &fdlocks->locks, llist) {
913                 if (offset + length <= li->offset ||
914                     offset >= li->offset + li->length)
915                         continue;
916                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
917                     server->ops->compare_fids(cfile, cur_cfile)) {
918                         /* shared lock prevents write op through the same fid */
919                         if (!(li->type & server->vals->shared_lock_type) ||
920                             rw_check != CIFS_WRITE_OP)
921                                 continue;
922                 }
923                 if ((type & server->vals->shared_lock_type) &&
924                     ((server->ops->compare_fids(cfile, cur_cfile) &&
925                      current->tgid == li->pid) || type == li->type))
926                         continue;
927                 if (rw_check == CIFS_LOCK_OP &&
928                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
929                     server->ops->compare_fids(cfile, cur_cfile))
930                         continue;
931                 if (conf_lock)
932                         *conf_lock = li;
933                 return true;
934         }
935         return false;
936 }
937
938 bool
939 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940                         __u8 type, __u16 flags,
941                         struct cifsLockInfo **conf_lock, int rw_check)
942 {
943         bool rc = false;
944         struct cifs_fid_locks *cur;
945         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946
947         list_for_each_entry(cur, &cinode->llist, llist) {
948                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949                                                  flags, cfile, conf_lock,
950                                                  rw_check);
951                 if (rc)
952                         break;
953         }
954
955         return rc;
956 }
957
958 /*
959  * Check if there is another lock that prevents us to set the lock (mandatory
960  * style). If such a lock exists, update the flock structure with its
961  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
962  * or leave it the same if we can't. Returns 0 if we don't need to request to
963  * the server or 1 otherwise.
964  */
965 static int
966 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
967                __u8 type, struct file_lock *flock)
968 {
969         int rc = 0;
970         struct cifsLockInfo *conf_lock;
971         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973         bool exist;
974
975         down_read(&cinode->lock_sem);
976
977         exist = cifs_find_lock_conflict(cfile, offset, length, type,
978                                         flock->fl_flags, &conf_lock,
979                                         CIFS_LOCK_OP);
980         if (exist) {
981                 flock->fl_start = conf_lock->offset;
982                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
983                 flock->fl_pid = conf_lock->pid;
984                 if (conf_lock->type & server->vals->shared_lock_type)
985                         flock->fl_type = F_RDLCK;
986                 else
987                         flock->fl_type = F_WRLCK;
988         } else if (!cinode->can_cache_brlcks)
989                 rc = 1;
990         else
991                 flock->fl_type = F_UNLCK;
992
993         up_read(&cinode->lock_sem);
994         return rc;
995 }
996
997 static void
998 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999 {
1000         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001         down_write(&cinode->lock_sem);
1002         list_add_tail(&lock->llist, &cfile->llist->locks);
1003         up_write(&cinode->lock_sem);
1004 }
1005
1006 /*
1007  * Set the byte-range lock (mandatory style). Returns:
1008  * 1) 0, if we set the lock and don't need to request to the server;
1009  * 2) 1, if no locks prevent us but we need to request to the server;
1010  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011  */
1012 static int
1013 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014                  bool wait)
1015 {
1016         struct cifsLockInfo *conf_lock;
1017         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018         bool exist;
1019         int rc = 0;
1020
1021 try_again:
1022         exist = false;
1023         down_write(&cinode->lock_sem);
1024
1025         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026                                         lock->type, lock->flags, &conf_lock,
1027                                         CIFS_LOCK_OP);
1028         if (!exist && cinode->can_cache_brlcks) {
1029                 list_add_tail(&lock->llist, &cfile->llist->locks);
1030                 up_write(&cinode->lock_sem);
1031                 return rc;
1032         }
1033
1034         if (!exist)
1035                 rc = 1;
1036         else if (!wait)
1037                 rc = -EACCES;
1038         else {
1039                 list_add_tail(&lock->blist, &conf_lock->blist);
1040                 up_write(&cinode->lock_sem);
1041                 rc = wait_event_interruptible(lock->block_q,
1042                                         (lock->blist.prev == &lock->blist) &&
1043                                         (lock->blist.next == &lock->blist));
1044                 if (!rc)
1045                         goto try_again;
1046                 down_write(&cinode->lock_sem);
1047                 list_del_init(&lock->blist);
1048         }
1049
1050         up_write(&cinode->lock_sem);
1051         return rc;
1052 }
1053
1054 /*
1055  * Check if there is another lock that prevents us to set the lock (posix
1056  * style). If such a lock exists, update the flock structure with its
1057  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1058  * or leave it the same if we can't. Returns 0 if we don't need to request to
1059  * the server or 1 otherwise.
1060  */
1061 static int
1062 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1063 {
1064         int rc = 0;
1065         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066         unsigned char saved_type = flock->fl_type;
1067
1068         if ((flock->fl_flags & FL_POSIX) == 0)
1069                 return 1;
1070
1071         down_read(&cinode->lock_sem);
1072         posix_test_lock(file, flock);
1073
1074         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1075                 flock->fl_type = saved_type;
1076                 rc = 1;
1077         }
1078
1079         up_read(&cinode->lock_sem);
1080         return rc;
1081 }
1082
1083 /*
1084  * Set the byte-range lock (posix style). Returns:
1085  * 1) 0, if we set the lock and don't need to request to the server;
1086  * 2) 1, if we need to request to the server;
1087  * 3) <0, if the error occurs while setting the lock.
1088  */
1089 static int
1090 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1091 {
1092         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093         int rc = 1;
1094
1095         if ((flock->fl_flags & FL_POSIX) == 0)
1096                 return rc;
1097
1098 try_again:
1099         down_write(&cinode->lock_sem);
1100         if (!cinode->can_cache_brlcks) {
1101                 up_write(&cinode->lock_sem);
1102                 return rc;
1103         }
1104
1105         rc = posix_lock_file(file, flock, NULL);
1106         up_write(&cinode->lock_sem);
1107         if (rc == FILE_LOCK_DEFERRED) {
1108                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109                 if (!rc)
1110                         goto try_again;
1111                 locks_delete_block(flock);
1112         }
1113         return rc;
1114 }
1115
1116 int
1117 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118 {
1119         unsigned int xid;
1120         int rc = 0, stored_rc;
1121         struct cifsLockInfo *li, *tmp;
1122         struct cifs_tcon *tcon;
1123         unsigned int num, max_num, max_buf;
1124         LOCKING_ANDX_RANGE *buf, *cur;
1125         static const int types[] = {
1126                 LOCKING_ANDX_LARGE_FILES,
1127                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1128         };
1129         int i;
1130
1131         xid = get_xid();
1132         tcon = tlink_tcon(cfile->tlink);
1133
1134         /*
1135          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136          * and check it before using.
1137          */
1138         max_buf = tcon->ses->server->maxBuf;
1139         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140                 free_xid(xid);
1141                 return -EINVAL;
1142         }
1143
1144         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145                      PAGE_SIZE);
1146         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147                         PAGE_SIZE);
1148         max_num = (max_buf - sizeof(struct smb_hdr)) /
1149                                                 sizeof(LOCKING_ANDX_RANGE);
1150         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151         if (!buf) {
1152                 free_xid(xid);
1153                 return -ENOMEM;
1154         }
1155
1156         for (i = 0; i < 2; i++) {
1157                 cur = buf;
1158                 num = 0;
1159                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160                         if (li->type != types[i])
1161                                 continue;
1162                         cur->Pid = cpu_to_le16(li->pid);
1163                         cur->LengthLow = cpu_to_le32((u32)li->length);
1164                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1165                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1166                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1167                         if (++num == max_num) {
1168                                 stored_rc = cifs_lockv(xid, tcon,
1169                                                        cfile->fid.netfid,
1170                                                        (__u8)li->type, 0, num,
1171                                                        buf);
1172                                 if (stored_rc)
1173                                         rc = stored_rc;
1174                                 cur = buf;
1175                                 num = 0;
1176                         } else
1177                                 cur++;
1178                 }
1179
1180                 if (num) {
1181                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182                                                (__u8)types[i], 0, num, buf);
1183                         if (stored_rc)
1184                                 rc = stored_rc;
1185                 }
1186         }
1187
1188         kfree(buf);
1189         free_xid(xid);
1190         return rc;
1191 }
1192
1193 static __u32
1194 hash_lockowner(fl_owner_t owner)
1195 {
1196         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1197 }
1198
1199 struct lock_to_push {
1200         struct list_head llist;
1201         __u64 offset;
1202         __u64 length;
1203         __u32 pid;
1204         __u16 netfid;
1205         __u8 type;
1206 };
1207
1208 static int
1209 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210 {
1211         struct inode *inode = d_inode(cfile->dentry);
1212         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213         struct file_lock *flock;
1214         struct file_lock_context *flctx = inode->i_flctx;
1215         unsigned int count = 0, i;
1216         int rc = 0, xid, type;
1217         struct list_head locks_to_send, *el;
1218         struct lock_to_push *lck, *tmp;
1219         __u64 length;
1220
1221         xid = get_xid();
1222
1223         if (!flctx)
1224                 goto out;
1225
1226         spin_lock(&flctx->flc_lock);
1227         list_for_each(el, &flctx->flc_posix) {
1228                 count++;
1229         }
1230         spin_unlock(&flctx->flc_lock);
1231
1232         INIT_LIST_HEAD(&locks_to_send);
1233
1234         /*
1235          * Allocating count locks is enough because no FL_POSIX locks can be
1236          * added to the list while we are holding cinode->lock_sem that
1237          * protects locking operations of this inode.
1238          */
1239         for (i = 0; i < count; i++) {
1240                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1241                 if (!lck) {
1242                         rc = -ENOMEM;
1243                         goto err_out;
1244                 }
1245                 list_add_tail(&lck->llist, &locks_to_send);
1246         }
1247
1248         el = locks_to_send.next;
1249         spin_lock(&flctx->flc_lock);
1250         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251                 if (el == &locks_to_send) {
1252                         /*
1253                          * The list ended. We don't have enough allocated
1254                          * structures - something is really wrong.
1255                          */
1256                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1257                         break;
1258                 }
1259                 length = 1 + flock->fl_end - flock->fl_start;
1260                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1261                         type = CIFS_RDLCK;
1262                 else
1263                         type = CIFS_WRLCK;
1264                 lck = list_entry(el, struct lock_to_push, llist);
1265                 lck->pid = hash_lockowner(flock->fl_owner);
1266                 lck->netfid = cfile->fid.netfid;
1267                 lck->length = length;
1268                 lck->type = type;
1269                 lck->offset = flock->fl_start;
1270         }
1271         spin_unlock(&flctx->flc_lock);
1272
1273         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1274                 int stored_rc;
1275
1276                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277                                              lck->offset, lck->length, NULL,
1278                                              lck->type, 0);
1279                 if (stored_rc)
1280                         rc = stored_rc;
1281                 list_del(&lck->llist);
1282                 kfree(lck);
1283         }
1284
1285 out:
1286         free_xid(xid);
1287         return rc;
1288 err_out:
1289         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1290                 list_del(&lck->llist);
1291                 kfree(lck);
1292         }
1293         goto out;
1294 }
1295
1296 static int
1297 cifs_push_locks(struct cifsFileInfo *cfile)
1298 {
1299         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302         int rc = 0;
1303
1304         /* we are going to update can_cache_brlcks here - need a write access */
1305         down_write(&cinode->lock_sem);
1306         if (!cinode->can_cache_brlcks) {
1307                 up_write(&cinode->lock_sem);
1308                 return rc;
1309         }
1310
1311         if (cap_unix(tcon->ses) &&
1312             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1313             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314                 rc = cifs_push_posix_locks(cfile);
1315         else
1316                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317
1318         cinode->can_cache_brlcks = false;
1319         up_write(&cinode->lock_sem);
1320         return rc;
1321 }
1322
1323 static void
1324 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325                 bool *wait_flag, struct TCP_Server_Info *server)
1326 {
1327         if (flock->fl_flags & FL_POSIX)
1328                 cifs_dbg(FYI, "Posix\n");
1329         if (flock->fl_flags & FL_FLOCK)
1330                 cifs_dbg(FYI, "Flock\n");
1331         if (flock->fl_flags & FL_SLEEP) {
1332                 cifs_dbg(FYI, "Blocking lock\n");
1333                 *wait_flag = true;
1334         }
1335         if (flock->fl_flags & FL_ACCESS)
1336                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337         if (flock->fl_flags & FL_LEASE)
1338                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339         if (flock->fl_flags &
1340             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1343
1344         *type = server->vals->large_lock_type;
1345         if (flock->fl_type == F_WRLCK) {
1346                 cifs_dbg(FYI, "F_WRLCK\n");
1347                 *type |= server->vals->exclusive_lock_type;
1348                 *lock = 1;
1349         } else if (flock->fl_type == F_UNLCK) {
1350                 cifs_dbg(FYI, "F_UNLCK\n");
1351                 *type |= server->vals->unlock_lock_type;
1352                 *unlock = 1;
1353                 /* Check if unlock includes more than one lock range */
1354         } else if (flock->fl_type == F_RDLCK) {
1355                 cifs_dbg(FYI, "F_RDLCK\n");
1356                 *type |= server->vals->shared_lock_type;
1357                 *lock = 1;
1358         } else if (flock->fl_type == F_EXLCK) {
1359                 cifs_dbg(FYI, "F_EXLCK\n");
1360                 *type |= server->vals->exclusive_lock_type;
1361                 *lock = 1;
1362         } else if (flock->fl_type == F_SHLCK) {
1363                 cifs_dbg(FYI, "F_SHLCK\n");
1364                 *type |= server->vals->shared_lock_type;
1365                 *lock = 1;
1366         } else
1367                 cifs_dbg(FYI, "Unknown type of lock\n");
1368 }
1369
1370 static int
1371 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372            bool wait_flag, bool posix_lck, unsigned int xid)
1373 {
1374         int rc = 0;
1375         __u64 length = 1 + flock->fl_end - flock->fl_start;
1376         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1377         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378         struct TCP_Server_Info *server = tcon->ses->server;
1379         __u16 netfid = cfile->fid.netfid;
1380
1381         if (posix_lck) {
1382                 int posix_lock_type;
1383
1384                 rc = cifs_posix_lock_test(file, flock);
1385                 if (!rc)
1386                         return rc;
1387
1388                 if (type & server->vals->shared_lock_type)
1389                         posix_lock_type = CIFS_RDLCK;
1390                 else
1391                         posix_lock_type = CIFS_WRLCK;
1392                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1393                                       hash_lockowner(flock->fl_owner),
1394                                       flock->fl_start, length, flock,
1395                                       posix_lock_type, wait_flag);
1396                 return rc;
1397         }
1398
1399         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400         if (!rc)
1401                 return rc;
1402
1403         /* BB we could chain these into one lock request BB */
1404         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1405                                     1, 0, false);
1406         if (rc == 0) {
1407                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1408                                             type, 0, 1, false);
1409                 flock->fl_type = F_UNLCK;
1410                 if (rc != 0)
1411                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1412                                  rc);
1413                 return 0;
1414         }
1415
1416         if (type & server->vals->shared_lock_type) {
1417                 flock->fl_type = F_WRLCK;
1418                 return 0;
1419         }
1420
1421         type &= ~server->vals->exclusive_lock_type;
1422
1423         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1424                                     type | server->vals->shared_lock_type,
1425                                     1, 0, false);
1426         if (rc == 0) {
1427                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1428                         type | server->vals->shared_lock_type, 0, 1, false);
1429                 flock->fl_type = F_RDLCK;
1430                 if (rc != 0)
1431                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1432                                  rc);
1433         } else
1434                 flock->fl_type = F_WRLCK;
1435
1436         return 0;
1437 }
1438
1439 void
1440 cifs_move_llist(struct list_head *source, struct list_head *dest)
1441 {
1442         struct list_head *li, *tmp;
1443         list_for_each_safe(li, tmp, source)
1444                 list_move(li, dest);
1445 }
1446
1447 void
1448 cifs_free_llist(struct list_head *llist)
1449 {
1450         struct cifsLockInfo *li, *tmp;
1451         list_for_each_entry_safe(li, tmp, llist, llist) {
1452                 cifs_del_lock_waiters(li);
1453                 list_del(&li->llist);
1454                 kfree(li);
1455         }
1456 }
1457
1458 int
1459 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1460                   unsigned int xid)
1461 {
1462         int rc = 0, stored_rc;
1463         static const int types[] = {
1464                 LOCKING_ANDX_LARGE_FILES,
1465                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1466         };
1467         unsigned int i;
1468         unsigned int max_num, num, max_buf;
1469         LOCKING_ANDX_RANGE *buf, *cur;
1470         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472         struct cifsLockInfo *li, *tmp;
1473         __u64 length = 1 + flock->fl_end - flock->fl_start;
1474         struct list_head tmp_llist;
1475
1476         INIT_LIST_HEAD(&tmp_llist);
1477
1478         /*
1479          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480          * and check it before using.
1481          */
1482         max_buf = tcon->ses->server->maxBuf;
1483         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484                 return -EINVAL;
1485
1486         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487                      PAGE_SIZE);
1488         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489                         PAGE_SIZE);
1490         max_num = (max_buf - sizeof(struct smb_hdr)) /
1491                                                 sizeof(LOCKING_ANDX_RANGE);
1492         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493         if (!buf)
1494                 return -ENOMEM;
1495
1496         down_write(&cinode->lock_sem);
1497         for (i = 0; i < 2; i++) {
1498                 cur = buf;
1499                 num = 0;
1500                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501                         if (flock->fl_start > li->offset ||
1502                             (flock->fl_start + length) <
1503                             (li->offset + li->length))
1504                                 continue;
1505                         if (current->tgid != li->pid)
1506                                 continue;
1507                         if (types[i] != li->type)
1508                                 continue;
1509                         if (cinode->can_cache_brlcks) {
1510                                 /*
1511                                  * We can cache brlock requests - simply remove
1512                                  * a lock from the file's list.
1513                                  */
1514                                 list_del(&li->llist);
1515                                 cifs_del_lock_waiters(li);
1516                                 kfree(li);
1517                                 continue;
1518                         }
1519                         cur->Pid = cpu_to_le16(li->pid);
1520                         cur->LengthLow = cpu_to_le32((u32)li->length);
1521                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1522                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1523                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1524                         /*
1525                          * We need to save a lock here to let us add it again to
1526                          * the file's list if the unlock range request fails on
1527                          * the server.
1528                          */
1529                         list_move(&li->llist, &tmp_llist);
1530                         if (++num == max_num) {
1531                                 stored_rc = cifs_lockv(xid, tcon,
1532                                                        cfile->fid.netfid,
1533                                                        li->type, num, 0, buf);
1534                                 if (stored_rc) {
1535                                         /*
1536                                          * We failed on the unlock range
1537                                          * request - add all locks from the tmp
1538                                          * list to the head of the file's list.
1539                                          */
1540                                         cifs_move_llist(&tmp_llist,
1541                                                         &cfile->llist->locks);
1542                                         rc = stored_rc;
1543                                 } else
1544                                         /*
1545                                          * The unlock range request succeed -
1546                                          * free the tmp list.
1547                                          */
1548                                         cifs_free_llist(&tmp_llist);
1549                                 cur = buf;
1550                                 num = 0;
1551                         } else
1552                                 cur++;
1553                 }
1554                 if (num) {
1555                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556                                                types[i], num, 0, buf);
1557                         if (stored_rc) {
1558                                 cifs_move_llist(&tmp_llist,
1559                                                 &cfile->llist->locks);
1560                                 rc = stored_rc;
1561                         } else
1562                                 cifs_free_llist(&tmp_llist);
1563                 }
1564         }
1565
1566         up_write(&cinode->lock_sem);
1567         kfree(buf);
1568         return rc;
1569 }
1570
1571 static int
1572 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573            bool wait_flag, bool posix_lck, int lock, int unlock,
1574            unsigned int xid)
1575 {
1576         int rc = 0;
1577         __u64 length = 1 + flock->fl_end - flock->fl_start;
1578         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1579         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580         struct TCP_Server_Info *server = tcon->ses->server;
1581         struct inode *inode = d_inode(cfile->dentry);
1582
1583         if (posix_lck) {
1584                 int posix_lock_type;
1585
1586                 rc = cifs_posix_lock_set(file, flock);
1587                 if (!rc || rc < 0)
1588                         return rc;
1589
1590                 if (type & server->vals->shared_lock_type)
1591                         posix_lock_type = CIFS_RDLCK;
1592                 else
1593                         posix_lock_type = CIFS_WRLCK;
1594
1595                 if (unlock == 1)
1596                         posix_lock_type = CIFS_UNLCK;
1597
1598                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599                                       hash_lockowner(flock->fl_owner),
1600                                       flock->fl_start, length,
1601                                       NULL, posix_lock_type, wait_flag);
1602                 goto out;
1603         }
1604
1605         if (lock) {
1606                 struct cifsLockInfo *lock;
1607
1608                 lock = cifs_lock_init(flock->fl_start, length, type,
1609                                       flock->fl_flags);
1610                 if (!lock)
1611                         return -ENOMEM;
1612
1613                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614                 if (rc < 0) {
1615                         kfree(lock);
1616                         return rc;
1617                 }
1618                 if (!rc)
1619                         goto out;
1620
1621                 /*
1622                  * Windows 7 server can delay breaking lease from read to None
1623                  * if we set a byte-range lock on a file - break it explicitly
1624                  * before sending the lock to the server to be sure the next
1625                  * read won't conflict with non-overlapted locks due to
1626                  * pagereading.
1627                  */
1628                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1629                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1630                         cifs_zap_mapping(inode);
1631                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1632                                  inode);
1633                         CIFS_I(inode)->oplock = 0;
1634                 }
1635
1636                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637                                             type, 1, 0, wait_flag);
1638                 if (rc) {
1639                         kfree(lock);
1640                         return rc;
1641                 }
1642
1643                 cifs_lock_add(cfile, lock);
1644         } else if (unlock)
1645                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646
1647 out:
1648         if (flock->fl_flags & FL_POSIX && !rc)
1649                 rc = locks_lock_file_wait(file, flock);
1650         return rc;
1651 }
1652
1653 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1654 {
1655         int rc, xid;
1656         int lock = 0, unlock = 0;
1657         bool wait_flag = false;
1658         bool posix_lck = false;
1659         struct cifs_sb_info *cifs_sb;
1660         struct cifs_tcon *tcon;
1661         struct cifsInodeInfo *cinode;
1662         struct cifsFileInfo *cfile;
1663         __u16 netfid;
1664         __u32 type;
1665
1666         rc = -EACCES;
1667         xid = get_xid();
1668
1669         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1670                  cmd, flock->fl_flags, flock->fl_type,
1671                  flock->fl_start, flock->fl_end);
1672
1673         cfile = (struct cifsFileInfo *)file->private_data;
1674         tcon = tlink_tcon(cfile->tlink);
1675
1676         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1677                         tcon->ses->server);
1678         cifs_sb = CIFS_FILE_SB(file);
1679         netfid = cfile->fid.netfid;
1680         cinode = CIFS_I(file_inode(file));
1681
1682         if (cap_unix(tcon->ses) &&
1683             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1684             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1685                 posix_lck = true;
1686         /*
1687          * BB add code here to normalize offset and length to account for
1688          * negative length which we can not accept over the wire.
1689          */
1690         if (IS_GETLK(cmd)) {
1691                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1692                 free_xid(xid);
1693                 return rc;
1694         }
1695
1696         if (!lock && !unlock) {
1697                 /*
1698                  * if no lock or unlock then nothing to do since we do not
1699                  * know what it is
1700                  */
1701                 free_xid(xid);
1702                 return -EOPNOTSUPP;
1703         }
1704
1705         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1706                         xid);
1707         free_xid(xid);
1708         return rc;
1709 }
1710
1711 /*
1712  * update the file size (if needed) after a write. Should be called with
1713  * the inode->i_lock held
1714  */
1715 void
1716 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1717                       unsigned int bytes_written)
1718 {
1719         loff_t end_of_write = offset + bytes_written;
1720
1721         if (end_of_write > cifsi->server_eof)
1722                 cifsi->server_eof = end_of_write;
1723 }
1724
1725 static ssize_t
1726 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1727            size_t write_size, loff_t *offset)
1728 {
1729         int rc = 0;
1730         unsigned int bytes_written = 0;
1731         unsigned int total_written;
1732         struct cifs_sb_info *cifs_sb;
1733         struct cifs_tcon *tcon;
1734         struct TCP_Server_Info *server;
1735         unsigned int xid;
1736         struct dentry *dentry = open_file->dentry;
1737         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1738         struct cifs_io_parms io_parms;
1739
1740         cifs_sb = CIFS_SB(dentry->d_sb);
1741
1742         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1743                  write_size, *offset, dentry);
1744
1745         tcon = tlink_tcon(open_file->tlink);
1746         server = tcon->ses->server;
1747
1748         if (!server->ops->sync_write)
1749                 return -ENOSYS;
1750
1751         xid = get_xid();
1752
1753         for (total_written = 0; write_size > total_written;
1754              total_written += bytes_written) {
1755                 rc = -EAGAIN;
1756                 while (rc == -EAGAIN) {
1757                         struct kvec iov[2];
1758                         unsigned int len;
1759
1760                         if (open_file->invalidHandle) {
1761                                 /* we could deadlock if we called
1762                                    filemap_fdatawait from here so tell
1763                                    reopen_file not to flush data to
1764                                    server now */
1765                                 rc = cifs_reopen_file(open_file, false);
1766                                 if (rc != 0)
1767                                         break;
1768                         }
1769
1770                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1771                                   (unsigned int)write_size - total_written);
1772                         /* iov[0] is reserved for smb header */
1773                         iov[1].iov_base = (char *)write_data + total_written;
1774                         iov[1].iov_len = len;
1775                         io_parms.pid = pid;
1776                         io_parms.tcon = tcon;
1777                         io_parms.offset = *offset;
1778                         io_parms.length = len;
1779                         rc = server->ops->sync_write(xid, &open_file->fid,
1780                                         &io_parms, &bytes_written, iov, 1);
1781                 }
1782                 if (rc || (bytes_written == 0)) {
1783                         if (total_written)
1784                                 break;
1785                         else {
1786                                 free_xid(xid);
1787                                 return rc;
1788                         }
1789                 } else {
1790                         spin_lock(&d_inode(dentry)->i_lock);
1791                         cifs_update_eof(cifsi, *offset, bytes_written);
1792                         spin_unlock(&d_inode(dentry)->i_lock);
1793                         *offset += bytes_written;
1794                 }
1795         }
1796
1797         cifs_stats_bytes_written(tcon, total_written);
1798
1799         if (total_written > 0) {
1800                 spin_lock(&d_inode(dentry)->i_lock);
1801                 if (*offset > d_inode(dentry)->i_size)
1802                         i_size_write(d_inode(dentry), *offset);
1803                 spin_unlock(&d_inode(dentry)->i_lock);
1804         }
1805         mark_inode_dirty_sync(d_inode(dentry));
1806         free_xid(xid);
1807         return total_written;
1808 }
1809
1810 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1811                                         bool fsuid_only)
1812 {
1813         struct cifsFileInfo *open_file = NULL;
1814         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1815         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1816
1817         /* only filter by fsuid on multiuser mounts */
1818         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1819                 fsuid_only = false;
1820
1821         spin_lock(&tcon->open_file_lock);
1822         /* we could simply get the first_list_entry since write-only entries
1823            are always at the end of the list but since the first entry might
1824            have a close pending, we go through the whole list */
1825         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1826                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1827                         continue;
1828                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1829                         if (!open_file->invalidHandle) {
1830                                 /* found a good file */
1831                                 /* lock it so it will not be closed on us */
1832                                 cifsFileInfo_get(open_file);
1833                                 spin_unlock(&tcon->open_file_lock);
1834                                 return open_file;
1835                         } /* else might as well continue, and look for
1836                              another, or simply have the caller reopen it
1837                              again rather than trying to fix this handle */
1838                 } else /* write only file */
1839                         break; /* write only files are last so must be done */
1840         }
1841         spin_unlock(&tcon->open_file_lock);
1842         return NULL;
1843 }
1844
1845 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1846                                         bool fsuid_only)
1847 {
1848         struct cifsFileInfo *open_file, *inv_file = NULL;
1849         struct cifs_sb_info *cifs_sb;
1850         struct cifs_tcon *tcon;
1851         bool any_available = false;
1852         int rc;
1853         unsigned int refind = 0;
1854
1855         /* Having a null inode here (because mapping->host was set to zero by
1856         the VFS or MM) should not happen but we had reports of on oops (due to
1857         it being zero) during stress testcases so we need to check for it */
1858
1859         if (cifs_inode == NULL) {
1860                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1861                 dump_stack();
1862                 return NULL;
1863         }
1864
1865         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1866         tcon = cifs_sb_master_tcon(cifs_sb);
1867
1868         /* only filter by fsuid on multiuser mounts */
1869         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1870                 fsuid_only = false;
1871
1872         spin_lock(&tcon->open_file_lock);
1873 refind_writable:
1874         if (refind > MAX_REOPEN_ATT) {
1875                 spin_unlock(&tcon->open_file_lock);
1876                 return NULL;
1877         }
1878         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1879                 if (!any_available && open_file->pid != current->tgid)
1880                         continue;
1881                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1882                         continue;
1883                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1884                         if (!open_file->invalidHandle) {
1885                                 /* found a good writable file */
1886                                 cifsFileInfo_get(open_file);
1887                                 spin_unlock(&tcon->open_file_lock);
1888                                 return open_file;
1889                         } else {
1890                                 if (!inv_file)
1891                                         inv_file = open_file;
1892                         }
1893                 }
1894         }
1895         /* couldn't find useable FH with same pid, try any available */
1896         if (!any_available) {
1897                 any_available = true;
1898                 goto refind_writable;
1899         }
1900
1901         if (inv_file) {
1902                 any_available = false;
1903                 cifsFileInfo_get(inv_file);
1904         }
1905
1906         spin_unlock(&tcon->open_file_lock);
1907
1908         if (inv_file) {
1909                 rc = cifs_reopen_file(inv_file, false);
1910                 if (!rc)
1911                         return inv_file;
1912                 else {
1913                         spin_lock(&tcon->open_file_lock);
1914                         list_move_tail(&inv_file->flist,
1915                                         &cifs_inode->openFileList);
1916                         spin_unlock(&tcon->open_file_lock);
1917                         cifsFileInfo_put(inv_file);
1918                         ++refind;
1919                         inv_file = NULL;
1920                         spin_lock(&tcon->open_file_lock);
1921                         goto refind_writable;
1922                 }
1923         }
1924
1925         return NULL;
1926 }
1927
1928 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1929 {
1930         struct address_space *mapping = page->mapping;
1931         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1932         char *write_data;
1933         int rc = -EFAULT;
1934         int bytes_written = 0;
1935         struct inode *inode;
1936         struct cifsFileInfo *open_file;
1937
1938         if (!mapping || !mapping->host)
1939                 return -EFAULT;
1940
1941         inode = page->mapping->host;
1942
1943         offset += (loff_t)from;
1944         write_data = kmap(page);
1945         write_data += from;
1946
1947         if ((to > PAGE_SIZE) || (from > to)) {
1948                 kunmap(page);
1949                 return -EIO;
1950         }
1951
1952         /* racing with truncate? */
1953         if (offset > mapping->host->i_size) {
1954                 kunmap(page);
1955                 return 0; /* don't care */
1956         }
1957
1958         /* check to make sure that we are not extending the file */
1959         if (mapping->host->i_size - offset < (loff_t)to)
1960                 to = (unsigned)(mapping->host->i_size - offset);
1961
1962         open_file = find_writable_file(CIFS_I(mapping->host), false);
1963         if (open_file) {
1964                 bytes_written = cifs_write(open_file, open_file->pid,
1965                                            write_data, to - from, &offset);
1966                 cifsFileInfo_put(open_file);
1967                 /* Does mm or vfs already set times? */
1968                 inode->i_atime = inode->i_mtime = current_time(inode);
1969                 if ((bytes_written > 0) && (offset))
1970                         rc = 0;
1971                 else if (bytes_written < 0)
1972                         rc = bytes_written;
1973         } else {
1974                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1975                 rc = -EIO;
1976         }
1977
1978         kunmap(page);
1979         return rc;
1980 }
1981
1982 static struct cifs_writedata *
1983 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1984                           pgoff_t end, pgoff_t *index,
1985                           unsigned int *found_pages)
1986 {
1987         struct cifs_writedata *wdata;
1988
1989         wdata = cifs_writedata_alloc((unsigned int)tofind,
1990                                      cifs_writev_complete);
1991         if (!wdata)
1992                 return NULL;
1993
1994         *found_pages = find_get_pages_range_tag(mapping, index, end,
1995                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1996         return wdata;
1997 }
1998
1999 static unsigned int
2000 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2001                     struct address_space *mapping,
2002                     struct writeback_control *wbc,
2003                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2004 {
2005         unsigned int nr_pages = 0, i;
2006         struct page *page;
2007
2008         for (i = 0; i < found_pages; i++) {
2009                 page = wdata->pages[i];
2010                 /*
2011                  * At this point we hold neither the i_pages lock nor the
2012                  * page lock: the page may be truncated or invalidated
2013                  * (changing page->mapping to NULL), or even swizzled
2014                  * back from swapper_space to tmpfs file mapping
2015                  */
2016
2017                 if (nr_pages == 0)
2018                         lock_page(page);
2019                 else if (!trylock_page(page))
2020                         break;
2021
2022                 if (unlikely(page->mapping != mapping)) {
2023                         unlock_page(page);
2024                         break;
2025                 }
2026
2027                 if (!wbc->range_cyclic && page->index > end) {
2028                         *done = true;
2029                         unlock_page(page);
2030                         break;
2031                 }
2032
2033                 if (*next && (page->index != *next)) {
2034                         /* Not next consecutive page */
2035                         unlock_page(page);
2036                         break;
2037                 }
2038
2039                 if (wbc->sync_mode != WB_SYNC_NONE)
2040                         wait_on_page_writeback(page);
2041
2042                 if (PageWriteback(page) ||
2043                                 !clear_page_dirty_for_io(page)) {
2044                         unlock_page(page);
2045                         break;
2046                 }
2047
2048                 /*
2049                  * This actually clears the dirty bit in the radix tree.
2050                  * See cifs_writepage() for more commentary.
2051                  */
2052                 set_page_writeback(page);
2053                 if (page_offset(page) >= i_size_read(mapping->host)) {
2054                         *done = true;
2055                         unlock_page(page);
2056                         end_page_writeback(page);
2057                         break;
2058                 }
2059
2060                 wdata->pages[i] = page;
2061                 *next = page->index + 1;
2062                 ++nr_pages;
2063         }
2064
2065         /* reset index to refind any pages skipped */
2066         if (nr_pages == 0)
2067                 *index = wdata->pages[0]->index + 1;
2068
2069         /* put any pages we aren't going to use */
2070         for (i = nr_pages; i < found_pages; i++) {
2071                 put_page(wdata->pages[i]);
2072                 wdata->pages[i] = NULL;
2073         }
2074
2075         return nr_pages;
2076 }
2077
2078 static int
2079 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2080                  struct address_space *mapping, struct writeback_control *wbc)
2081 {
2082         int rc = 0;
2083         struct TCP_Server_Info *server;
2084         unsigned int i;
2085
2086         wdata->sync_mode = wbc->sync_mode;
2087         wdata->nr_pages = nr_pages;
2088         wdata->offset = page_offset(wdata->pages[0]);
2089         wdata->pagesz = PAGE_SIZE;
2090         wdata->tailsz = min(i_size_read(mapping->host) -
2091                         page_offset(wdata->pages[nr_pages - 1]),
2092                         (loff_t)PAGE_SIZE);
2093         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2094
2095         if (wdata->cfile != NULL)
2096                 cifsFileInfo_put(wdata->cfile);
2097         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2098         if (!wdata->cfile) {
2099                 cifs_dbg(VFS, "No writable handles for inode\n");
2100                 rc = -EBADF;
2101         } else {
2102                 wdata->pid = wdata->cfile->pid;
2103                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2104                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2105         }
2106
2107         for (i = 0; i < nr_pages; ++i)
2108                 unlock_page(wdata->pages[i]);
2109
2110         return rc;
2111 }
2112
2113 static int cifs_writepages(struct address_space *mapping,
2114                            struct writeback_control *wbc)
2115 {
2116         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2117         struct TCP_Server_Info *server;
2118         bool done = false, scanned = false, range_whole = false;
2119         pgoff_t end, index;
2120         struct cifs_writedata *wdata;
2121         int rc = 0;
2122         int saved_rc = 0;
2123         unsigned int xid;
2124
2125         /*
2126          * If wsize is smaller than the page cache size, default to writing
2127          * one page at a time via cifs_writepage
2128          */
2129         if (cifs_sb->wsize < PAGE_SIZE)
2130                 return generic_writepages(mapping, wbc);
2131
2132         xid = get_xid();
2133         if (wbc->range_cyclic) {
2134                 index = mapping->writeback_index; /* Start from prev offset */
2135                 end = -1;
2136         } else {
2137                 index = wbc->range_start >> PAGE_SHIFT;
2138                 end = wbc->range_end >> PAGE_SHIFT;
2139                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2140                         range_whole = true;
2141                 scanned = true;
2142         }
2143         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2144 retry:
2145         while (!done && index <= end) {
2146                 unsigned int i, nr_pages, found_pages, wsize, credits;
2147                 pgoff_t next = 0, tofind, saved_index = index;
2148
2149                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2150                                                    &wsize, &credits);
2151                 if (rc != 0) {
2152                         done = true;
2153                         break;
2154                 }
2155
2156                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2157
2158                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2159                                                   &found_pages);
2160                 if (!wdata) {
2161                         rc = -ENOMEM;
2162                         done = true;
2163                         add_credits_and_wake_if(server, credits, 0);
2164                         break;
2165                 }
2166
2167                 if (found_pages == 0) {
2168                         kref_put(&wdata->refcount, cifs_writedata_release);
2169                         add_credits_and_wake_if(server, credits, 0);
2170                         break;
2171                 }
2172
2173                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2174                                                end, &index, &next, &done);
2175
2176                 /* nothing to write? */
2177                 if (nr_pages == 0) {
2178                         kref_put(&wdata->refcount, cifs_writedata_release);
2179                         add_credits_and_wake_if(server, credits, 0);
2180                         continue;
2181                 }
2182
2183                 wdata->credits = credits;
2184
2185                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2186
2187                 /* send failure -- clean up the mess */
2188                 if (rc != 0) {
2189                         add_credits_and_wake_if(server, wdata->credits, 0);
2190                         for (i = 0; i < nr_pages; ++i) {
2191                                 if (is_retryable_error(rc))
2192                                         redirty_page_for_writepage(wbc,
2193                                                            wdata->pages[i]);
2194                                 else
2195                                         SetPageError(wdata->pages[i]);
2196                                 end_page_writeback(wdata->pages[i]);
2197                                 put_page(wdata->pages[i]);
2198                         }
2199                         if (!is_retryable_error(rc))
2200                                 mapping_set_error(mapping, rc);
2201                 }
2202                 kref_put(&wdata->refcount, cifs_writedata_release);
2203
2204                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2205                         index = saved_index;
2206                         continue;
2207                 }
2208
2209                 /* Return immediately if we received a signal during writing */
2210                 if (is_interrupt_error(rc)) {
2211                         done = true;
2212                         break;
2213                 }
2214
2215                 if (rc != 0 && saved_rc == 0)
2216                         saved_rc = rc;
2217
2218                 wbc->nr_to_write -= nr_pages;
2219                 if (wbc->nr_to_write <= 0)
2220                         done = true;
2221
2222                 index = next;
2223         }
2224
2225         if (!scanned && !done) {
2226                 /*
2227                  * We hit the last page and there is more work to be done: wrap
2228                  * back to the start of the file
2229                  */
2230                 scanned = true;
2231                 index = 0;
2232                 goto retry;
2233         }
2234
2235         if (saved_rc != 0)
2236                 rc = saved_rc;
2237
2238         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2239                 mapping->writeback_index = index;
2240
2241         free_xid(xid);
2242         return rc;
2243 }
2244
2245 static int
2246 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2247 {
2248         int rc;
2249         unsigned int xid;
2250
2251         xid = get_xid();
2252 /* BB add check for wbc flags */
2253         get_page(page);
2254         if (!PageUptodate(page))
2255                 cifs_dbg(FYI, "ppw - page not up to date\n");
2256
2257         /*
2258          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2259          *
2260          * A writepage() implementation always needs to do either this,
2261          * or re-dirty the page with "redirty_page_for_writepage()" in
2262          * the case of a failure.
2263          *
2264          * Just unlocking the page will cause the radix tree tag-bits
2265          * to fail to update with the state of the page correctly.
2266          */
2267         set_page_writeback(page);
2268 retry_write:
2269         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2270         if (is_retryable_error(rc)) {
2271                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2272                         goto retry_write;
2273                 redirty_page_for_writepage(wbc, page);
2274         } else if (rc != 0) {
2275                 SetPageError(page);
2276                 mapping_set_error(page->mapping, rc);
2277         } else {
2278                 SetPageUptodate(page);
2279         }
2280         end_page_writeback(page);
2281         put_page(page);
2282         free_xid(xid);
2283         return rc;
2284 }
2285
2286 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2287 {
2288         int rc = cifs_writepage_locked(page, wbc);
2289         unlock_page(page);
2290         return rc;
2291 }
2292
2293 static int cifs_write_end(struct file *file, struct address_space *mapping,
2294                         loff_t pos, unsigned len, unsigned copied,
2295                         struct page *page, void *fsdata)
2296 {
2297         int rc;
2298         struct inode *inode = mapping->host;
2299         struct cifsFileInfo *cfile = file->private_data;
2300         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2301         __u32 pid;
2302
2303         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2304                 pid = cfile->pid;
2305         else
2306                 pid = current->tgid;
2307
2308         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2309                  page, pos, copied);
2310
2311         if (PageChecked(page)) {
2312                 if (copied == len)
2313                         SetPageUptodate(page);
2314                 ClearPageChecked(page);
2315         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2316                 SetPageUptodate(page);
2317
2318         if (!PageUptodate(page)) {
2319                 char *page_data;
2320                 unsigned offset = pos & (PAGE_SIZE - 1);
2321                 unsigned int xid;
2322
2323                 xid = get_xid();
2324                 /* this is probably better than directly calling
2325                    partialpage_write since in this function the file handle is
2326                    known which we might as well leverage */
2327                 /* BB check if anything else missing out of ppw
2328                    such as updating last write time */
2329                 page_data = kmap(page);
2330                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2331                 /* if (rc < 0) should we set writebehind rc? */
2332                 kunmap(page);
2333
2334                 free_xid(xid);
2335         } else {
2336                 rc = copied;
2337                 pos += copied;
2338                 set_page_dirty(page);
2339         }
2340
2341         if (rc > 0) {
2342                 spin_lock(&inode->i_lock);
2343                 if (pos > inode->i_size)
2344                         i_size_write(inode, pos);
2345                 spin_unlock(&inode->i_lock);
2346         }
2347
2348         unlock_page(page);
2349         put_page(page);
2350
2351         return rc;
2352 }
2353
2354 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2355                       int datasync)
2356 {
2357         unsigned int xid;
2358         int rc = 0;
2359         struct cifs_tcon *tcon;
2360         struct TCP_Server_Info *server;
2361         struct cifsFileInfo *smbfile = file->private_data;
2362         struct inode *inode = file_inode(file);
2363         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2364
2365         rc = file_write_and_wait_range(file, start, end);
2366         if (rc)
2367                 return rc;
2368         inode_lock(inode);
2369
2370         xid = get_xid();
2371
2372         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2373                  file, datasync);
2374
2375         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2376                 rc = cifs_zap_mapping(inode);
2377                 if (rc) {
2378                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2379                         rc = 0; /* don't care about it in fsync */
2380                 }
2381         }
2382
2383         tcon = tlink_tcon(smbfile->tlink);
2384         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2385                 server = tcon->ses->server;
2386                 if (server->ops->flush)
2387                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2388                 else
2389                         rc = -ENOSYS;
2390         }
2391
2392         free_xid(xid);
2393         inode_unlock(inode);
2394         return rc;
2395 }
2396
2397 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2398 {
2399         unsigned int xid;
2400         int rc = 0;
2401         struct cifs_tcon *tcon;
2402         struct TCP_Server_Info *server;
2403         struct cifsFileInfo *smbfile = file->private_data;
2404         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2405         struct inode *inode = file->f_mapping->host;
2406
2407         rc = file_write_and_wait_range(file, start, end);
2408         if (rc)
2409                 return rc;
2410         inode_lock(inode);
2411
2412         xid = get_xid();
2413
2414         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2415                  file, datasync);
2416
2417         tcon = tlink_tcon(smbfile->tlink);
2418         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2419                 server = tcon->ses->server;
2420                 if (server->ops->flush)
2421                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2422                 else
2423                         rc = -ENOSYS;
2424         }
2425
2426         free_xid(xid);
2427         inode_unlock(inode);
2428         return rc;
2429 }
2430
2431 /*
2432  * As file closes, flush all cached write data for this inode checking
2433  * for write behind errors.
2434  */
2435 int cifs_flush(struct file *file, fl_owner_t id)
2436 {
2437         struct inode *inode = file_inode(file);
2438         int rc = 0;
2439
2440         if (file->f_mode & FMODE_WRITE)
2441                 rc = filemap_write_and_wait(inode->i_mapping);
2442
2443         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2444
2445         return rc;
2446 }
2447
2448 static int
2449 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2450 {
2451         int rc = 0;
2452         unsigned long i;
2453
2454         for (i = 0; i < num_pages; i++) {
2455                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2456                 if (!pages[i]) {
2457                         /*
2458                          * save number of pages we have already allocated and
2459                          * return with ENOMEM error
2460                          */
2461                         num_pages = i;
2462                         rc = -ENOMEM;
2463                         break;
2464                 }
2465         }
2466
2467         if (rc) {
2468                 for (i = 0; i < num_pages; i++)
2469                         put_page(pages[i]);
2470         }
2471         return rc;
2472 }
2473
2474 static inline
2475 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2476 {
2477         size_t num_pages;
2478         size_t clen;
2479
2480         clen = min_t(const size_t, len, wsize);
2481         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2482
2483         if (cur_len)
2484                 *cur_len = clen;
2485
2486         return num_pages;
2487 }
2488
2489 static void
2490 cifs_uncached_writedata_release(struct kref *refcount)
2491 {
2492         int i;
2493         struct cifs_writedata *wdata = container_of(refcount,
2494                                         struct cifs_writedata, refcount);
2495
2496         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2497         for (i = 0; i < wdata->nr_pages; i++)
2498                 put_page(wdata->pages[i]);
2499         cifs_writedata_release(refcount);
2500 }
2501
2502 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2503
2504 static void
2505 cifs_uncached_writev_complete(struct work_struct *work)
2506 {
2507         struct cifs_writedata *wdata = container_of(work,
2508                                         struct cifs_writedata, work);
2509         struct inode *inode = d_inode(wdata->cfile->dentry);
2510         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2511
2512         spin_lock(&inode->i_lock);
2513         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2514         if (cifsi->server_eof > inode->i_size)
2515                 i_size_write(inode, cifsi->server_eof);
2516         spin_unlock(&inode->i_lock);
2517
2518         complete(&wdata->done);
2519         collect_uncached_write_data(wdata->ctx);
2520         /* the below call can possibly free the last ref to aio ctx */
2521         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2522 }
2523
2524 static int
2525 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2526                       size_t *len, unsigned long *num_pages)
2527 {
2528         size_t save_len, copied, bytes, cur_len = *len;
2529         unsigned long i, nr_pages = *num_pages;
2530
2531         save_len = cur_len;
2532         for (i = 0; i < nr_pages; i++) {
2533                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2534                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2535                 cur_len -= copied;
2536                 /*
2537                  * If we didn't copy as much as we expected, then that
2538                  * may mean we trod into an unmapped area. Stop copying
2539                  * at that point. On the next pass through the big
2540                  * loop, we'll likely end up getting a zero-length
2541                  * write and bailing out of it.
2542                  */
2543                 if (copied < bytes)
2544                         break;
2545         }
2546         cur_len = save_len - cur_len;
2547         *len = cur_len;
2548
2549         /*
2550          * If we have no data to send, then that probably means that
2551          * the copy above failed altogether. That's most likely because
2552          * the address in the iovec was bogus. Return -EFAULT and let
2553          * the caller free anything we allocated and bail out.
2554          */
2555         if (!cur_len)
2556                 return -EFAULT;
2557
2558         /*
2559          * i + 1 now represents the number of pages we actually used in
2560          * the copy phase above.
2561          */
2562         *num_pages = i + 1;
2563         return 0;
2564 }
2565
2566 static int
2567 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2568         struct cifs_aio_ctx *ctx)
2569 {
2570         unsigned int wsize, credits;
2571         int rc;
2572         struct TCP_Server_Info *server =
2573                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2574
2575         /*
2576          * Wait for credits to resend this wdata.
2577          * Note: we are attempting to resend the whole wdata not in segments
2578          */
2579         do {
2580                 rc = server->ops->wait_mtu_credits(
2581                         server, wdata->bytes, &wsize, &credits);
2582
2583                 if (rc)
2584                         goto out;
2585
2586                 if (wsize < wdata->bytes) {
2587                         add_credits_and_wake_if(server, credits, 0);
2588                         msleep(1000);
2589                 }
2590         } while (wsize < wdata->bytes);
2591
2592         rc = -EAGAIN;
2593         while (rc == -EAGAIN) {
2594                 rc = 0;
2595                 if (wdata->cfile->invalidHandle)
2596                         rc = cifs_reopen_file(wdata->cfile, false);
2597                 if (!rc)
2598                         rc = server->ops->async_writev(wdata,
2599                                         cifs_uncached_writedata_release);
2600         }
2601
2602         if (!rc) {
2603                 list_add_tail(&wdata->list, wdata_list);
2604                 return 0;
2605         }
2606
2607         add_credits_and_wake_if(server, wdata->credits, 0);
2608 out:
2609         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2610
2611         return rc;
2612 }
2613
2614 static int
2615 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2616                      struct cifsFileInfo *open_file,
2617                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2618                      struct cifs_aio_ctx *ctx)
2619 {
2620         int rc = 0;
2621         size_t cur_len;
2622         unsigned long nr_pages, num_pages, i;
2623         struct cifs_writedata *wdata;
2624         struct iov_iter saved_from = *from;
2625         loff_t saved_offset = offset;
2626         pid_t pid;
2627         struct TCP_Server_Info *server;
2628         struct page **pagevec;
2629         size_t start;
2630
2631         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2632                 pid = open_file->pid;
2633         else
2634                 pid = current->tgid;
2635
2636         server = tlink_tcon(open_file->tlink)->ses->server;
2637
2638         do {
2639                 unsigned int wsize, credits;
2640
2641                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2642                                                    &wsize, &credits);
2643                 if (rc)
2644                         break;
2645
2646                 cur_len = min_t(const size_t, len, wsize);
2647
2648                 if (ctx->direct_io) {
2649                         ssize_t result;
2650
2651                         result = iov_iter_get_pages_alloc(
2652                                 from, &pagevec, cur_len, &start);
2653                         if (result < 0) {
2654                                 cifs_dbg(VFS,
2655                                         "direct_writev couldn't get user pages "
2656                                         "(rc=%zd) iter type %d iov_offset %zd "
2657                                         "count %zd\n",
2658                                         result, from->type,
2659                                         from->iov_offset, from->count);
2660                                 dump_stack();
2661
2662                                 rc = result;
2663                                 add_credits_and_wake_if(server, credits, 0);
2664                                 break;
2665                         }
2666                         cur_len = (size_t)result;
2667                         iov_iter_advance(from, cur_len);
2668
2669                         nr_pages =
2670                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2671
2672                         wdata = cifs_writedata_direct_alloc(pagevec,
2673                                              cifs_uncached_writev_complete);
2674                         if (!wdata) {
2675                                 rc = -ENOMEM;
2676                                 add_credits_and_wake_if(server, credits, 0);
2677                                 break;
2678                         }
2679
2680
2681                         wdata->page_offset = start;
2682                         wdata->tailsz =
2683                                 nr_pages > 1 ?
2684                                         cur_len - (PAGE_SIZE - start) -
2685                                         (nr_pages - 2) * PAGE_SIZE :
2686                                         cur_len;
2687                 } else {
2688                         nr_pages = get_numpages(wsize, len, &cur_len);
2689                         wdata = cifs_writedata_alloc(nr_pages,
2690                                              cifs_uncached_writev_complete);
2691                         if (!wdata) {
2692                                 rc = -ENOMEM;
2693                                 add_credits_and_wake_if(server, credits, 0);
2694                                 break;
2695                         }
2696
2697                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2698                         if (rc) {
2699                                 kfree(wdata);
2700                                 add_credits_and_wake_if(server, credits, 0);
2701                                 break;
2702                         }
2703
2704                         num_pages = nr_pages;
2705                         rc = wdata_fill_from_iovec(
2706                                 wdata, from, &cur_len, &num_pages);
2707                         if (rc) {
2708                                 for (i = 0; i < nr_pages; i++)
2709                                         put_page(wdata->pages[i]);
2710                                 kfree(wdata);
2711                                 add_credits_and_wake_if(server, credits, 0);
2712                                 break;
2713                         }
2714
2715                         /*
2716                          * Bring nr_pages down to the number of pages we
2717                          * actually used, and free any pages that we didn't use.
2718                          */
2719                         for ( ; nr_pages > num_pages; nr_pages--)
2720                                 put_page(wdata->pages[nr_pages - 1]);
2721
2722                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2723                 }
2724
2725                 wdata->sync_mode = WB_SYNC_ALL;
2726                 wdata->nr_pages = nr_pages;
2727                 wdata->offset = (__u64)offset;
2728                 wdata->cfile = cifsFileInfo_get(open_file);
2729                 wdata->pid = pid;
2730                 wdata->bytes = cur_len;
2731                 wdata->pagesz = PAGE_SIZE;
2732                 wdata->credits = credits;
2733                 wdata->ctx = ctx;
2734                 kref_get(&ctx->refcount);
2735
2736                 if (!wdata->cfile->invalidHandle ||
2737                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2738                         rc = server->ops->async_writev(wdata,
2739                                         cifs_uncached_writedata_release);
2740                 if (rc) {
2741                         add_credits_and_wake_if(server, wdata->credits, 0);
2742                         kref_put(&wdata->refcount,
2743                                  cifs_uncached_writedata_release);
2744                         if (rc == -EAGAIN) {
2745                                 *from = saved_from;
2746                                 iov_iter_advance(from, offset - saved_offset);
2747                                 continue;
2748                         }
2749                         break;
2750                 }
2751
2752                 list_add_tail(&wdata->list, wdata_list);
2753                 offset += cur_len;
2754                 len -= cur_len;
2755         } while (len > 0);
2756
2757         return rc;
2758 }
2759
2760 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2761 {
2762         struct cifs_writedata *wdata, *tmp;
2763         struct cifs_tcon *tcon;
2764         struct cifs_sb_info *cifs_sb;
2765         struct dentry *dentry = ctx->cfile->dentry;
2766         unsigned int i;
2767         int rc;
2768
2769         tcon = tlink_tcon(ctx->cfile->tlink);
2770         cifs_sb = CIFS_SB(dentry->d_sb);
2771
2772         mutex_lock(&ctx->aio_mutex);
2773
2774         if (list_empty(&ctx->list)) {
2775                 mutex_unlock(&ctx->aio_mutex);
2776                 return;
2777         }
2778
2779         rc = ctx->rc;
2780         /*
2781          * Wait for and collect replies for any successful sends in order of
2782          * increasing offset. Once an error is hit, then return without waiting
2783          * for any more replies.
2784          */
2785 restart_loop:
2786         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2787                 if (!rc) {
2788                         if (!try_wait_for_completion(&wdata->done)) {
2789                                 mutex_unlock(&ctx->aio_mutex);
2790                                 return;
2791                         }
2792
2793                         if (wdata->result)
2794                                 rc = wdata->result;
2795                         else
2796                                 ctx->total_len += wdata->bytes;
2797
2798                         /* resend call if it's a retryable error */
2799                         if (rc == -EAGAIN) {
2800                                 struct list_head tmp_list;
2801                                 struct iov_iter tmp_from = ctx->iter;
2802
2803                                 INIT_LIST_HEAD(&tmp_list);
2804                                 list_del_init(&wdata->list);
2805
2806                                 if (ctx->direct_io)
2807                                         rc = cifs_resend_wdata(
2808                                                 wdata, &tmp_list, ctx);
2809                                 else {
2810                                         iov_iter_advance(&tmp_from,
2811                                                  wdata->offset - ctx->pos);
2812
2813                                         rc = cifs_write_from_iter(wdata->offset,
2814                                                 wdata->bytes, &tmp_from,
2815                                                 ctx->cfile, cifs_sb, &tmp_list,
2816                                                 ctx);
2817                                 }
2818
2819                                 list_splice(&tmp_list, &ctx->list);
2820
2821                                 kref_put(&wdata->refcount,
2822                                          cifs_uncached_writedata_release);
2823                                 goto restart_loop;
2824                         }
2825                 }
2826                 list_del_init(&wdata->list);
2827                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2828         }
2829
2830         if (!ctx->direct_io)
2831                 for (i = 0; i < ctx->npages; i++)
2832                         put_page(ctx->bv[i].bv_page);
2833
2834         cifs_stats_bytes_written(tcon, ctx->total_len);
2835         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2836
2837         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2838
2839         mutex_unlock(&ctx->aio_mutex);
2840
2841         if (ctx->iocb && ctx->iocb->ki_complete)
2842                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2843         else
2844                 complete(&ctx->done);
2845 }
2846
2847 static ssize_t __cifs_writev(
2848         struct kiocb *iocb, struct iov_iter *from, bool direct)
2849 {
2850         struct file *file = iocb->ki_filp;
2851         ssize_t total_written = 0;
2852         struct cifsFileInfo *cfile;
2853         struct cifs_tcon *tcon;
2854         struct cifs_sb_info *cifs_sb;
2855         struct cifs_aio_ctx *ctx;
2856         struct iov_iter saved_from = *from;
2857         size_t len = iov_iter_count(from);
2858         int rc;
2859
2860         /*
2861          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2862          * In this case, fall back to non-direct write function.
2863          * this could be improved by getting pages directly in ITER_KVEC
2864          */
2865         if (direct && from->type & ITER_KVEC) {
2866                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2867                 direct = false;
2868         }
2869
2870         rc = generic_write_checks(iocb, from);
2871         if (rc <= 0)
2872                 return rc;
2873
2874         cifs_sb = CIFS_FILE_SB(file);
2875         cfile = file->private_data;
2876         tcon = tlink_tcon(cfile->tlink);
2877
2878         if (!tcon->ses->server->ops->async_writev)
2879                 return -ENOSYS;
2880
2881         ctx = cifs_aio_ctx_alloc();
2882         if (!ctx)
2883                 return -ENOMEM;
2884
2885         ctx->cfile = cifsFileInfo_get(cfile);
2886
2887         if (!is_sync_kiocb(iocb))
2888                 ctx->iocb = iocb;
2889
2890         ctx->pos = iocb->ki_pos;
2891
2892         if (direct) {
2893                 ctx->direct_io = true;
2894                 ctx->iter = *from;
2895                 ctx->len = len;
2896         } else {
2897                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2898                 if (rc) {
2899                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2900                         return rc;
2901                 }
2902         }
2903
2904         /* grab a lock here due to read response handlers can access ctx */
2905         mutex_lock(&ctx->aio_mutex);
2906
2907         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2908                                   cfile, cifs_sb, &ctx->list, ctx);
2909
2910         /*
2911          * If at least one write was successfully sent, then discard any rc
2912          * value from the later writes. If the other write succeeds, then
2913          * we'll end up returning whatever was written. If it fails, then
2914          * we'll get a new rc value from that.
2915          */
2916         if (!list_empty(&ctx->list))
2917                 rc = 0;
2918
2919         mutex_unlock(&ctx->aio_mutex);
2920
2921         if (rc) {
2922                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2923                 return rc;
2924         }
2925
2926         if (!is_sync_kiocb(iocb)) {
2927                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2928                 return -EIOCBQUEUED;
2929         }
2930
2931         rc = wait_for_completion_killable(&ctx->done);
2932         if (rc) {
2933                 mutex_lock(&ctx->aio_mutex);
2934                 ctx->rc = rc = -EINTR;
2935                 total_written = ctx->total_len;
2936                 mutex_unlock(&ctx->aio_mutex);
2937         } else {
2938                 rc = ctx->rc;
2939                 total_written = ctx->total_len;
2940         }
2941
2942         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2943
2944         if (unlikely(!total_written))
2945                 return rc;
2946
2947         iocb->ki_pos += total_written;
2948         return total_written;
2949 }
2950
2951 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2952 {
2953         return __cifs_writev(iocb, from, true);
2954 }
2955
2956 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2957 {
2958         return __cifs_writev(iocb, from, false);
2959 }
2960
2961 static ssize_t
2962 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2963 {
2964         struct file *file = iocb->ki_filp;
2965         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2966         struct inode *inode = file->f_mapping->host;
2967         struct cifsInodeInfo *cinode = CIFS_I(inode);
2968         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2969         ssize_t rc;
2970
2971         inode_lock(inode);
2972         /*
2973          * We need to hold the sem to be sure nobody modifies lock list
2974          * with a brlock that prevents writing.
2975          */
2976         down_read(&cinode->lock_sem);
2977
2978         rc = generic_write_checks(iocb, from);
2979         if (rc <= 0)
2980                 goto out;
2981
2982         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2983                                      server->vals->exclusive_lock_type, 0,
2984                                      NULL, CIFS_WRITE_OP))
2985                 rc = __generic_file_write_iter(iocb, from);
2986         else
2987                 rc = -EACCES;
2988 out:
2989         up_read(&cinode->lock_sem);
2990         inode_unlock(inode);
2991
2992         if (rc > 0)
2993                 rc = generic_write_sync(iocb, rc);
2994         return rc;
2995 }
2996
2997 ssize_t
2998 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2999 {
3000         struct inode *inode = file_inode(iocb->ki_filp);
3001         struct cifsInodeInfo *cinode = CIFS_I(inode);
3002         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3003         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3004                                                 iocb->ki_filp->private_data;
3005         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3006         ssize_t written;
3007
3008         written = cifs_get_writer(cinode);
3009         if (written)
3010                 return written;
3011
3012         if (CIFS_CACHE_WRITE(cinode)) {
3013                 if (cap_unix(tcon->ses) &&
3014                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3015                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3016                         written = generic_file_write_iter(iocb, from);
3017                         goto out;
3018                 }
3019                 written = cifs_writev(iocb, from);
3020                 goto out;
3021         }
3022         /*
3023          * For non-oplocked files in strict cache mode we need to write the data
3024          * to the server exactly from the pos to pos+len-1 rather than flush all
3025          * affected pages because it may cause a error with mandatory locks on
3026          * these pages but not on the region from pos to ppos+len-1.
3027          */
3028         written = cifs_user_writev(iocb, from);
3029         if (written > 0 && CIFS_CACHE_READ(cinode)) {
3030                 /*
3031                  * Windows 7 server can delay breaking level2 oplock if a write
3032                  * request comes - break it on the client to prevent reading
3033                  * an old data.
3034                  */
3035                 cifs_zap_mapping(inode);
3036                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
3037                          inode);
3038                 cinode->oplock = 0;
3039         }
3040 out:
3041         cifs_put_writer(cinode);
3042         return written;
3043 }
3044
3045 static struct cifs_readdata *
3046 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3047 {
3048         struct cifs_readdata *rdata;
3049
3050         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3051         if (rdata != NULL) {
3052                 rdata->pages = pages;
3053                 kref_init(&rdata->refcount);
3054                 INIT_LIST_HEAD(&rdata->list);
3055                 init_completion(&rdata->done);
3056                 INIT_WORK(&rdata->work, complete);
3057         }
3058
3059         return rdata;
3060 }
3061
3062 static struct cifs_readdata *
3063 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3064 {
3065         struct page **pages =
3066                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3067         struct cifs_readdata *ret = NULL;
3068
3069         if (pages) {
3070                 ret = cifs_readdata_direct_alloc(pages, complete);
3071                 if (!ret)
3072                         kfree(pages);
3073         }
3074
3075         return ret;
3076 }
3077
3078 void
3079 cifs_readdata_release(struct kref *refcount)
3080 {
3081         struct cifs_readdata *rdata = container_of(refcount,
3082                                         struct cifs_readdata, refcount);
3083 #ifdef CONFIG_CIFS_SMB_DIRECT
3084         if (rdata->mr) {
3085                 smbd_deregister_mr(rdata->mr);
3086                 rdata->mr = NULL;
3087         }
3088 #endif
3089         if (rdata->cfile)
3090                 cifsFileInfo_put(rdata->cfile);
3091
3092         kvfree(rdata->pages);
3093         kfree(rdata);
3094 }
3095
3096 static int
3097 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3098 {
3099         int rc = 0;
3100         struct page *page;
3101         unsigned int i;
3102
3103         for (i = 0; i < nr_pages; i++) {
3104                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3105                 if (!page) {
3106                         rc = -ENOMEM;
3107                         break;
3108                 }
3109                 rdata->pages[i] = page;
3110         }
3111
3112         if (rc) {
3113                 for (i = 0; i < nr_pages; i++) {
3114                         put_page(rdata->pages[i]);
3115                         rdata->pages[i] = NULL;
3116                 }
3117         }
3118         return rc;
3119 }
3120
3121 static void
3122 cifs_uncached_readdata_release(struct kref *refcount)
3123 {
3124         struct cifs_readdata *rdata = container_of(refcount,
3125                                         struct cifs_readdata, refcount);
3126         unsigned int i;
3127
3128         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3129         for (i = 0; i < rdata->nr_pages; i++) {
3130                 put_page(rdata->pages[i]);
3131         }
3132         cifs_readdata_release(refcount);
3133 }
3134
3135 /**
3136  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3137  * @rdata:      the readdata response with list of pages holding data
3138  * @iter:       destination for our data
3139  *
3140  * This function copies data from a list of pages in a readdata response into
3141  * an array of iovecs. It will first calculate where the data should go
3142  * based on the info in the readdata and then copy the data into that spot.
3143  */
3144 static int
3145 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3146 {
3147         size_t remaining = rdata->got_bytes;
3148         unsigned int i;
3149
3150         for (i = 0; i < rdata->nr_pages; i++) {
3151                 struct page *page = rdata->pages[i];
3152                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3153                 size_t written;
3154
3155                 if (unlikely(iov_iter_is_pipe(iter))) {
3156                         void *addr = kmap_atomic(page);
3157
3158                         written = copy_to_iter(addr, copy, iter);
3159                         kunmap_atomic(addr);
3160                 } else
3161                         written = copy_page_to_iter(page, 0, copy, iter);
3162                 remaining -= written;
3163                 if (written < copy && iov_iter_count(iter) > 0)
3164                         break;
3165         }
3166         return remaining ? -EFAULT : 0;
3167 }
3168
3169 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3170
3171 static void
3172 cifs_uncached_readv_complete(struct work_struct *work)
3173 {
3174         struct cifs_readdata *rdata = container_of(work,
3175                                                 struct cifs_readdata, work);
3176
3177         complete(&rdata->done);
3178         collect_uncached_read_data(rdata->ctx);
3179         /* the below call can possibly free the last ref to aio ctx */
3180         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3181 }
3182
3183 static int
3184 uncached_fill_pages(struct TCP_Server_Info *server,
3185                     struct cifs_readdata *rdata, struct iov_iter *iter,
3186                     unsigned int len)
3187 {
3188         int result = 0;
3189         unsigned int i;
3190         unsigned int nr_pages = rdata->nr_pages;
3191         unsigned int page_offset = rdata->page_offset;
3192
3193         rdata->got_bytes = 0;
3194         rdata->tailsz = PAGE_SIZE;
3195         for (i = 0; i < nr_pages; i++) {
3196                 struct page *page = rdata->pages[i];
3197                 size_t n;
3198                 unsigned int segment_size = rdata->pagesz;
3199
3200                 if (i == 0)
3201                         segment_size -= page_offset;
3202                 else
3203                         page_offset = 0;
3204
3205
3206                 if (len <= 0) {
3207                         /* no need to hold page hostage */
3208                         rdata->pages[i] = NULL;
3209                         rdata->nr_pages--;
3210                         put_page(page);
3211                         continue;
3212                 }
3213
3214                 n = len;
3215                 if (len >= segment_size)
3216                         /* enough data to fill the page */
3217                         n = segment_size;
3218                 else
3219                         rdata->tailsz = len;
3220                 len -= n;
3221
3222                 if (iter)
3223                         result = copy_page_from_iter(
3224                                         page, page_offset, n, iter);
3225 #ifdef CONFIG_CIFS_SMB_DIRECT
3226                 else if (rdata->mr)
3227                         result = n;
3228 #endif
3229                 else
3230                         result = cifs_read_page_from_socket(
3231                                         server, page, page_offset, n);
3232                 if (result < 0)
3233                         break;
3234
3235                 rdata->got_bytes += result;
3236         }
3237
3238         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3239                                                 rdata->got_bytes : result;
3240 }
3241
3242 static int
3243 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3244                               struct cifs_readdata *rdata, unsigned int len)
3245 {
3246         return uncached_fill_pages(server, rdata, NULL, len);
3247 }
3248
3249 static int
3250 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3251                               struct cifs_readdata *rdata,
3252                               struct iov_iter *iter)
3253 {
3254         return uncached_fill_pages(server, rdata, iter, iter->count);
3255 }
3256
3257 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3258                         struct list_head *rdata_list,
3259                         struct cifs_aio_ctx *ctx)
3260 {
3261         unsigned int rsize, credits;
3262         int rc;
3263         struct TCP_Server_Info *server =
3264                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3265
3266         /*
3267          * Wait for credits to resend this rdata.
3268          * Note: we are attempting to resend the whole rdata not in segments
3269          */
3270         do {
3271                 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3272                                                 &rsize, &credits);
3273
3274                 if (rc)
3275                         goto out;
3276
3277                 if (rsize < rdata->bytes) {
3278                         add_credits_and_wake_if(server, credits, 0);
3279                         msleep(1000);
3280                 }
3281         } while (rsize < rdata->bytes);
3282
3283         rc = -EAGAIN;
3284         while (rc == -EAGAIN) {
3285                 rc = 0;
3286                 if (rdata->cfile->invalidHandle)
3287                         rc = cifs_reopen_file(rdata->cfile, true);
3288                 if (!rc)
3289                         rc = server->ops->async_readv(rdata);
3290         }
3291
3292         if (!rc) {
3293                 /* Add to aio pending list */
3294                 list_add_tail(&rdata->list, rdata_list);
3295                 return 0;
3296         }
3297
3298         add_credits_and_wake_if(server, rdata->credits, 0);
3299 out:
3300         kref_put(&rdata->refcount,
3301                 cifs_uncached_readdata_release);
3302
3303         return rc;
3304 }
3305
3306 static int
3307 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3308                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3309                      struct cifs_aio_ctx *ctx)
3310 {
3311         struct cifs_readdata *rdata;
3312         unsigned int npages, rsize, credits;
3313         size_t cur_len;
3314         int rc;
3315         pid_t pid;
3316         struct TCP_Server_Info *server;
3317         struct page **pagevec;
3318         size_t start;
3319         struct iov_iter direct_iov = ctx->iter;
3320
3321         server = tlink_tcon(open_file->tlink)->ses->server;
3322
3323         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3324                 pid = open_file->pid;
3325         else
3326                 pid = current->tgid;
3327
3328         if (ctx->direct_io)
3329                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3330
3331         do {
3332                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3333                                                    &rsize, &credits);
3334                 if (rc)
3335                         break;
3336
3337                 cur_len = min_t(const size_t, len, rsize);
3338
3339                 if (ctx->direct_io) {
3340                         ssize_t result;
3341
3342                         result = iov_iter_get_pages_alloc(
3343                                         &direct_iov, &pagevec,
3344                                         cur_len, &start);
3345                         if (result < 0) {
3346                                 cifs_dbg(VFS,
3347                                         "couldn't get user pages (rc=%zd)"
3348                                         " iter type %d"
3349                                         " iov_offset %zd count %zd\n",
3350                                         result, direct_iov.type,
3351                                         direct_iov.iov_offset,
3352                                         direct_iov.count);
3353                                 dump_stack();
3354
3355                                 rc = result;
3356                                 add_credits_and_wake_if(server, credits, 0);
3357                                 break;
3358                         }
3359                         cur_len = (size_t)result;
3360                         iov_iter_advance(&direct_iov, cur_len);
3361
3362                         rdata = cifs_readdata_direct_alloc(
3363                                         pagevec, cifs_uncached_readv_complete);
3364                         if (!rdata) {
3365                                 add_credits_and_wake_if(server, credits, 0);
3366                                 rc = -ENOMEM;
3367                                 break;
3368                         }
3369
3370                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3371                         rdata->page_offset = start;
3372                         rdata->tailsz = npages > 1 ?
3373                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3374                                 cur_len;
3375
3376                 } else {
3377
3378                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3379                         /* allocate a readdata struct */
3380                         rdata = cifs_readdata_alloc(npages,
3381                                             cifs_uncached_readv_complete);
3382                         if (!rdata) {
3383                                 add_credits_and_wake_if(server, credits, 0);
3384                                 rc = -ENOMEM;
3385                                 break;
3386                         }
3387
3388                         rc = cifs_read_allocate_pages(rdata, npages);
3389                         if (rc)
3390                                 goto error;
3391
3392                         rdata->tailsz = PAGE_SIZE;
3393                 }
3394
3395                 rdata->cfile = cifsFileInfo_get(open_file);
3396                 rdata->nr_pages = npages;
3397                 rdata->offset = offset;
3398                 rdata->bytes = cur_len;
3399                 rdata->pid = pid;
3400                 rdata->pagesz = PAGE_SIZE;
3401                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3402                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3403                 rdata->credits = credits;
3404                 rdata->ctx = ctx;
3405                 kref_get(&ctx->refcount);
3406
3407                 if (!rdata->cfile->invalidHandle ||
3408                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3409                         rc = server->ops->async_readv(rdata);
3410 error:
3411                 if (rc) {
3412                         add_credits_and_wake_if(server, rdata->credits, 0);
3413                         kref_put(&rdata->refcount,
3414                                 cifs_uncached_readdata_release);
3415                         if (rc == -EAGAIN) {
3416                                 iov_iter_revert(&direct_iov, cur_len);
3417                                 continue;
3418                         }
3419                         break;
3420                 }
3421
3422                 list_add_tail(&rdata->list, rdata_list);
3423                 offset += cur_len;
3424                 len -= cur_len;
3425         } while (len > 0);
3426
3427         return rc;
3428 }
3429
3430 static void
3431 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3432 {
3433         struct cifs_readdata *rdata, *tmp;
3434         struct iov_iter *to = &ctx->iter;
3435         struct cifs_sb_info *cifs_sb;
3436         struct cifs_tcon *tcon;
3437         unsigned int i;
3438         int rc;
3439
3440         tcon = tlink_tcon(ctx->cfile->tlink);
3441         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3442
3443         mutex_lock(&ctx->aio_mutex);
3444
3445         if (list_empty(&ctx->list)) {
3446                 mutex_unlock(&ctx->aio_mutex);
3447                 return;
3448         }
3449
3450         rc = ctx->rc;
3451         /* the loop below should proceed in the order of increasing offsets */
3452 again:
3453         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3454                 if (!rc) {
3455                         if (!try_wait_for_completion(&rdata->done)) {
3456                                 mutex_unlock(&ctx->aio_mutex);
3457                                 return;
3458                         }
3459
3460                         if (rdata->result == -EAGAIN) {
3461                                 /* resend call if it's a retryable error */
3462                                 struct list_head tmp_list;
3463                                 unsigned int got_bytes = rdata->got_bytes;
3464
3465                                 list_del_init(&rdata->list);
3466                                 INIT_LIST_HEAD(&tmp_list);
3467
3468                                 /*
3469                                  * Got a part of data and then reconnect has
3470                                  * happened -- fill the buffer and continue
3471                                  * reading.
3472                                  */
3473                                 if (got_bytes && got_bytes < rdata->bytes) {
3474                                         rc = 0;
3475                                         if (!ctx->direct_io)
3476                                                 rc = cifs_readdata_to_iov(rdata, to);
3477                                         if (rc) {
3478                                                 kref_put(&rdata->refcount,
3479                                                         cifs_uncached_readdata_release);
3480                                                 continue;
3481                                         }
3482                                 }
3483
3484                                 if (ctx->direct_io) {
3485                                         /*
3486                                          * Re-use rdata as this is a
3487                                          * direct I/O
3488                                          */
3489                                         rc = cifs_resend_rdata(
3490                                                 rdata,
3491                                                 &tmp_list, ctx);
3492                                 } else {
3493                                         rc = cifs_send_async_read(
3494                                                 rdata->offset + got_bytes,
3495                                                 rdata->bytes - got_bytes,
3496                                                 rdata->cfile, cifs_sb,
3497                                                 &tmp_list, ctx);
3498
3499                                         kref_put(&rdata->refcount,
3500                                                 cifs_uncached_readdata_release);
3501                                 }
3502
3503                                 list_splice(&tmp_list, &ctx->list);
3504
3505                                 goto again;
3506                         } else if (rdata->result)
3507                                 rc = rdata->result;
3508                         else if (!ctx->direct_io)
3509                                 rc = cifs_readdata_to_iov(rdata, to);
3510
3511                         /* if there was a short read -- discard anything left */
3512                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3513                                 rc = -ENODATA;
3514
3515                         ctx->total_len += rdata->got_bytes;
3516                 }
3517                 list_del_init(&rdata->list);
3518                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3519         }
3520
3521         if (!ctx->direct_io) {
3522                 for (i = 0; i < ctx->npages; i++) {
3523                         if (ctx->should_dirty)
3524                                 set_page_dirty(ctx->bv[i].bv_page);
3525                         put_page(ctx->bv[i].bv_page);
3526                 }
3527
3528                 ctx->total_len = ctx->len - iov_iter_count(to);
3529         }
3530
3531         cifs_stats_bytes_read(tcon, ctx->total_len);
3532
3533         /* mask nodata case */
3534         if (rc == -ENODATA)
3535                 rc = 0;
3536
3537         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3538
3539         mutex_unlock(&ctx->aio_mutex);
3540
3541         if (ctx->iocb && ctx->iocb->ki_complete)
3542                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3543         else
3544                 complete(&ctx->done);
3545 }
3546
3547 static ssize_t __cifs_readv(
3548         struct kiocb *iocb, struct iov_iter *to, bool direct)
3549 {
3550         size_t len;
3551         struct file *file = iocb->ki_filp;
3552         struct cifs_sb_info *cifs_sb;
3553         struct cifsFileInfo *cfile;
3554         struct cifs_tcon *tcon;
3555         ssize_t rc, total_read = 0;
3556         loff_t offset = iocb->ki_pos;
3557         struct cifs_aio_ctx *ctx;
3558
3559         /*
3560          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3561          * fall back to data copy read path
3562          * this could be improved by getting pages directly in ITER_KVEC
3563          */
3564         if (direct && to->type & ITER_KVEC) {
3565                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3566                 direct = false;
3567         }
3568
3569         len = iov_iter_count(to);
3570         if (!len)
3571                 return 0;
3572
3573         cifs_sb = CIFS_FILE_SB(file);
3574         cfile = file->private_data;
3575         tcon = tlink_tcon(cfile->tlink);
3576
3577         if (!tcon->ses->server->ops->async_readv)
3578                 return -ENOSYS;
3579
3580         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3581                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3582
3583         ctx = cifs_aio_ctx_alloc();
3584         if (!ctx)
3585                 return -ENOMEM;
3586
3587         ctx->cfile = cifsFileInfo_get(cfile);
3588
3589         if (!is_sync_kiocb(iocb))
3590                 ctx->iocb = iocb;
3591
3592         if (iter_is_iovec(to))
3593                 ctx->should_dirty = true;
3594
3595         if (direct) {
3596                 ctx->pos = offset;
3597                 ctx->direct_io = true;
3598                 ctx->iter = *to;
3599                 ctx->len = len;
3600         } else {
3601                 rc = setup_aio_ctx_iter(ctx, to, READ);
3602                 if (rc) {
3603                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3604                         return rc;
3605                 }
3606                 len = ctx->len;
3607         }
3608
3609         /* grab a lock here due to read response handlers can access ctx */
3610         mutex_lock(&ctx->aio_mutex);
3611
3612         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3613
3614         /* if at least one read request send succeeded, then reset rc */
3615         if (!list_empty(&ctx->list))
3616                 rc = 0;
3617
3618         mutex_unlock(&ctx->aio_mutex);
3619
3620         if (rc) {
3621                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3622                 return rc;
3623         }
3624
3625         if (!is_sync_kiocb(iocb)) {
3626                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3627                 return -EIOCBQUEUED;
3628         }
3629
3630         rc = wait_for_completion_killable(&ctx->done);
3631         if (rc) {
3632                 mutex_lock(&ctx->aio_mutex);
3633                 ctx->rc = rc = -EINTR;
3634                 total_read = ctx->total_len;
3635                 mutex_unlock(&ctx->aio_mutex);
3636         } else {
3637                 rc = ctx->rc;
3638                 total_read = ctx->total_len;
3639         }
3640
3641         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3642
3643         if (total_read) {
3644                 iocb->ki_pos += total_read;
3645                 return total_read;
3646         }
3647         return rc;
3648 }
3649
3650 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3651 {
3652         return __cifs_readv(iocb, to, true);
3653 }
3654
3655 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3656 {
3657         return __cifs_readv(iocb, to, false);
3658 }
3659
3660 ssize_t
3661 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3662 {
3663         struct inode *inode = file_inode(iocb->ki_filp);
3664         struct cifsInodeInfo *cinode = CIFS_I(inode);
3665         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3666         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3667                                                 iocb->ki_filp->private_data;
3668         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3669         int rc = -EACCES;
3670
3671         /*
3672          * In strict cache mode we need to read from the server all the time
3673          * if we don't have level II oplock because the server can delay mtime
3674          * change - so we can't make a decision about inode invalidating.
3675          * And we can also fail with pagereading if there are mandatory locks
3676          * on pages affected by this read but not on the region from pos to
3677          * pos+len-1.
3678          */
3679         if (!CIFS_CACHE_READ(cinode))
3680                 return cifs_user_readv(iocb, to);
3681
3682         if (cap_unix(tcon->ses) &&
3683             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3684             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3685                 return generic_file_read_iter(iocb, to);
3686
3687         /*
3688          * We need to hold the sem to be sure nobody modifies lock list
3689          * with a brlock that prevents reading.
3690          */
3691         down_read(&cinode->lock_sem);
3692         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3693                                      tcon->ses->server->vals->shared_lock_type,
3694                                      0, NULL, CIFS_READ_OP))
3695                 rc = generic_file_read_iter(iocb, to);
3696         up_read(&cinode->lock_sem);
3697         return rc;
3698 }
3699
3700 static ssize_t
3701 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3702 {
3703         int rc = -EACCES;
3704         unsigned int bytes_read = 0;
3705         unsigned int total_read;
3706         unsigned int current_read_size;
3707         unsigned int rsize;
3708         struct cifs_sb_info *cifs_sb;
3709         struct cifs_tcon *tcon;
3710         struct TCP_Server_Info *server;
3711         unsigned int xid;
3712         char *cur_offset;
3713         struct cifsFileInfo *open_file;
3714         struct cifs_io_parms io_parms;
3715         int buf_type = CIFS_NO_BUFFER;
3716         __u32 pid;
3717
3718         xid = get_xid();
3719         cifs_sb = CIFS_FILE_SB(file);
3720
3721         /* FIXME: set up handlers for larger reads and/or convert to async */
3722         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3723
3724         if (file->private_data == NULL) {
3725                 rc = -EBADF;
3726                 free_xid(xid);
3727                 return rc;
3728         }
3729         open_file = file->private_data;
3730         tcon = tlink_tcon(open_file->tlink);
3731         server = tcon->ses->server;
3732
3733         if (!server->ops->sync_read) {
3734                 free_xid(xid);
3735                 return -ENOSYS;
3736         }
3737
3738         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3739                 pid = open_file->pid;
3740         else
3741                 pid = current->tgid;
3742
3743         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3744                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3745
3746         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3747              total_read += bytes_read, cur_offset += bytes_read) {
3748                 do {
3749                         current_read_size = min_t(uint, read_size - total_read,
3750                                                   rsize);
3751                         /*
3752                          * For windows me and 9x we do not want to request more
3753                          * than it negotiated since it will refuse the read
3754                          * then.
3755                          */
3756                         if ((tcon->ses) && !(tcon->ses->capabilities &
3757                                 tcon->ses->server->vals->cap_large_files)) {
3758                                 current_read_size = min_t(uint,
3759                                         current_read_size, CIFSMaxBufSize);
3760                         }
3761                         if (open_file->invalidHandle) {
3762                                 rc = cifs_reopen_file(open_file, true);
3763                                 if (rc != 0)
3764                                         break;
3765                         }
3766                         io_parms.pid = pid;
3767                         io_parms.tcon = tcon;
3768                         io_parms.offset = *offset;
3769                         io_parms.length = current_read_size;
3770                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3771                                                     &bytes_read, &cur_offset,
3772                                                     &buf_type);
3773                 } while (rc == -EAGAIN);
3774
3775                 if (rc || (bytes_read == 0)) {
3776                         if (total_read) {
3777                                 break;
3778                         } else {
3779                                 free_xid(xid);
3780                                 return rc;
3781                         }
3782                 } else {
3783                         cifs_stats_bytes_read(tcon, total_read);
3784                         *offset += bytes_read;
3785                 }
3786         }
3787         free_xid(xid);
3788         return total_read;
3789 }
3790
3791 /*
3792  * If the page is mmap'ed into a process' page tables, then we need to make
3793  * sure that it doesn't change while being written back.
3794  */
3795 static vm_fault_t
3796 cifs_page_mkwrite(struct vm_fault *vmf)
3797 {
3798         struct page *page = vmf->page;
3799
3800         lock_page(page);
3801         return VM_FAULT_LOCKED;
3802 }
3803
3804 static const struct vm_operations_struct cifs_file_vm_ops = {
3805         .fault = filemap_fault,
3806         .map_pages = filemap_map_pages,
3807         .page_mkwrite = cifs_page_mkwrite,
3808 };
3809
3810 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3811 {
3812         int xid, rc = 0;
3813         struct inode *inode = file_inode(file);
3814
3815         xid = get_xid();
3816
3817         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3818                 rc = cifs_zap_mapping(inode);
3819         if (!rc)
3820                 rc = generic_file_mmap(file, vma);
3821         if (!rc)
3822                 vma->vm_ops = &cifs_file_vm_ops;
3823
3824         free_xid(xid);
3825         return rc;
3826 }
3827
3828 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3829 {
3830         int rc, xid;
3831
3832         xid = get_xid();
3833
3834         rc = cifs_revalidate_file(file);
3835         if (rc)
3836                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3837                          rc);
3838         if (!rc)
3839                 rc = generic_file_mmap(file, vma);
3840         if (!rc)
3841                 vma->vm_ops = &cifs_file_vm_ops;
3842
3843         free_xid(xid);
3844         return rc;
3845 }
3846
3847 static void
3848 cifs_readv_complete(struct work_struct *work)
3849 {
3850         unsigned int i, got_bytes;
3851         struct cifs_readdata *rdata = container_of(work,
3852                                                 struct cifs_readdata, work);
3853
3854         got_bytes = rdata->got_bytes;
3855         for (i = 0; i < rdata->nr_pages; i++) {
3856                 struct page *page = rdata->pages[i];
3857
3858                 lru_cache_add_file(page);
3859
3860                 if (rdata->result == 0 ||
3861                     (rdata->result == -EAGAIN && got_bytes)) {
3862                         flush_dcache_page(page);
3863                         SetPageUptodate(page);
3864                 }
3865
3866                 unlock_page(page);
3867
3868                 if (rdata->result == 0 ||
3869                     (rdata->result == -EAGAIN && got_bytes))
3870                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3871
3872                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3873
3874                 put_page(page);
3875                 rdata->pages[i] = NULL;
3876         }
3877         kref_put(&rdata->refcount, cifs_readdata_release);
3878 }
3879
3880 static int
3881 readpages_fill_pages(struct TCP_Server_Info *server,
3882                      struct cifs_readdata *rdata, struct iov_iter *iter,
3883                      unsigned int len)
3884 {
3885         int result = 0;
3886         unsigned int i;
3887         u64 eof;
3888         pgoff_t eof_index;
3889         unsigned int nr_pages = rdata->nr_pages;
3890         unsigned int page_offset = rdata->page_offset;
3891
3892         /* determine the eof that the server (probably) has */
3893         eof = CIFS_I(rdata->mapping->host)->server_eof;
3894         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3895         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3896
3897         rdata->got_bytes = 0;
3898         rdata->tailsz = PAGE_SIZE;
3899         for (i = 0; i < nr_pages; i++) {
3900                 struct page *page = rdata->pages[i];
3901                 unsigned int to_read = rdata->pagesz;
3902                 size_t n;
3903
3904                 if (i == 0)
3905                         to_read -= page_offset;
3906                 else
3907                         page_offset = 0;
3908
3909                 n = to_read;
3910
3911                 if (len >= to_read) {
3912                         len -= to_read;
3913                 } else if (len > 0) {
3914                         /* enough for partial page, fill and zero the rest */
3915                         zero_user(page, len + page_offset, to_read - len);
3916                         n = rdata->tailsz = len;
3917                         len = 0;
3918                 } else if (page->index > eof_index) {
3919                         /*
3920                          * The VFS will not try to do readahead past the
3921                          * i_size, but it's possible that we have outstanding
3922                          * writes with gaps in the middle and the i_size hasn't
3923                          * caught up yet. Populate those with zeroed out pages
3924                          * to prevent the VFS from repeatedly attempting to
3925                          * fill them until the writes are flushed.
3926                          */
3927                         zero_user(page, 0, PAGE_SIZE);
3928                         lru_cache_add_file(page);
3929                         flush_dcache_page(page);
3930                         SetPageUptodate(page);
3931                         unlock_page(page);
3932                         put_page(page);
3933                         rdata->pages[i] = NULL;
3934                         rdata->nr_pages--;
3935                         continue;
3936                 } else {
3937                         /* no need to hold page hostage */
3938                         lru_cache_add_file(page);
3939                         unlock_page(page);
3940                         put_page(page);
3941                         rdata->pages[i] = NULL;
3942                         rdata->nr_pages--;
3943                         continue;
3944                 }
3945
3946                 if (iter)
3947                         result = copy_page_from_iter(
3948                                         page, page_offset, n, iter);
3949 #ifdef CONFIG_CIFS_SMB_DIRECT
3950                 else if (rdata->mr)
3951                         result = n;
3952 #endif
3953                 else
3954                         result = cifs_read_page_from_socket(
3955                                         server, page, page_offset, n);
3956                 if (result < 0)
3957                         break;
3958
3959                 rdata->got_bytes += result;
3960         }
3961
3962         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3963                                                 rdata->got_bytes : result;
3964 }
3965
3966 static int
3967 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3968                                struct cifs_readdata *rdata, unsigned int len)
3969 {
3970         return readpages_fill_pages(server, rdata, NULL, len);
3971 }
3972
3973 static int
3974 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3975                                struct cifs_readdata *rdata,
3976                                struct iov_iter *iter)
3977 {
3978         return readpages_fill_pages(server, rdata, iter, iter->count);
3979 }
3980
3981 static int
3982 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3983                     unsigned int rsize, struct list_head *tmplist,
3984                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3985 {
3986         struct page *page, *tpage;
3987         unsigned int expected_index;
3988         int rc;
3989         gfp_t gfp = readahead_gfp_mask(mapping);
3990
3991         INIT_LIST_HEAD(tmplist);
3992
3993         page = lru_to_page(page_list);
3994
3995         /*
3996          * Lock the page and put it in the cache. Since no one else
3997          * should have access to this page, we're safe to simply set
3998          * PG_locked without checking it first.
3999          */
4000         __SetPageLocked(page);
4001         rc = add_to_page_cache_locked(page, mapping,
4002                                       page->index, gfp);
4003
4004         /* give up if we can't stick it in the cache */
4005         if (rc) {
4006                 __ClearPageLocked(page);
4007                 return rc;
4008         }
4009
4010         /* move first page to the tmplist */
4011         *offset = (loff_t)page->index << PAGE_SHIFT;
4012         *bytes = PAGE_SIZE;
4013         *nr_pages = 1;
4014         list_move_tail(&page->lru, tmplist);
4015
4016         /* now try and add more pages onto the request */
4017         expected_index = page->index + 1;
4018         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4019                 /* discontinuity ? */
4020                 if (page->index != expected_index)
4021                         break;
4022
4023                 /* would this page push the read over the rsize? */
4024                 if (*bytes + PAGE_SIZE > rsize)
4025                         break;
4026
4027                 __SetPageLocked(page);
4028                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4029                         __ClearPageLocked(page);
4030                         break;
4031                 }
4032                 list_move_tail(&page->lru, tmplist);
4033                 (*bytes) += PAGE_SIZE;
4034                 expected_index++;
4035                 (*nr_pages)++;
4036         }
4037         return rc;
4038 }
4039
4040 static int cifs_readpages(struct file *file, struct address_space *mapping,
4041         struct list_head *page_list, unsigned num_pages)
4042 {
4043         int rc;
4044         struct list_head tmplist;
4045         struct cifsFileInfo *open_file = file->private_data;
4046         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4047         struct TCP_Server_Info *server;
4048         pid_t pid;
4049         unsigned int xid;
4050
4051         xid = get_xid();
4052         /*
4053          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4054          * immediately if the cookie is negative
4055          *
4056          * After this point, every page in the list might have PG_fscache set,
4057          * so we will need to clean that up off of every page we don't use.
4058          */
4059         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4060                                          &num_pages);
4061         if (rc == 0) {
4062                 free_xid(xid);
4063                 return rc;
4064         }
4065
4066         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067                 pid = open_file->pid;
4068         else
4069                 pid = current->tgid;
4070
4071         rc = 0;
4072         server = tlink_tcon(open_file->tlink)->ses->server;
4073
4074         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4075                  __func__, file, mapping, num_pages);
4076
4077         /*
4078          * Start with the page at end of list and move it to private
4079          * list. Do the same with any following pages until we hit
4080          * the rsize limit, hit an index discontinuity, or run out of
4081          * pages. Issue the async read and then start the loop again
4082          * until the list is empty.
4083          *
4084          * Note that list order is important. The page_list is in
4085          * the order of declining indexes. When we put the pages in
4086          * the rdata->pages, then we want them in increasing order.
4087          */
4088         while (!list_empty(page_list)) {
4089                 unsigned int i, nr_pages, bytes, rsize;
4090                 loff_t offset;
4091                 struct page *page, *tpage;
4092                 struct cifs_readdata *rdata;
4093                 unsigned credits;
4094
4095                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4096                                                    &rsize, &credits);
4097                 if (rc)
4098                         break;
4099
4100                 /*
4101                  * Give up immediately if rsize is too small to read an entire
4102                  * page. The VFS will fall back to readpage. We should never
4103                  * reach this point however since we set ra_pages to 0 when the
4104                  * rsize is smaller than a cache page.
4105                  */
4106                 if (unlikely(rsize < PAGE_SIZE)) {
4107                         add_credits_and_wake_if(server, credits, 0);
4108                         free_xid(xid);
4109                         return 0;
4110                 }
4111
4112                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4113                                          &nr_pages, &offset, &bytes);
4114                 if (rc) {
4115                         add_credits_and_wake_if(server, credits, 0);
4116                         break;
4117                 }
4118
4119                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4120                 if (!rdata) {
4121                         /* best to give up if we're out of mem */
4122                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4123                                 list_del(&page->lru);
4124                                 lru_cache_add_file(page);
4125                                 unlock_page(page);
4126                                 put_page(page);
4127                         }
4128                         rc = -ENOMEM;
4129                         add_credits_and_wake_if(server, credits, 0);
4130                         break;
4131                 }
4132
4133                 rdata->cfile = cifsFileInfo_get(open_file);
4134                 rdata->mapping = mapping;
4135                 rdata->offset = offset;
4136                 rdata->bytes = bytes;
4137                 rdata->pid = pid;
4138                 rdata->pagesz = PAGE_SIZE;
4139                 rdata->tailsz = PAGE_SIZE;
4140                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4141                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4142                 rdata->credits = credits;
4143
4144                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4145                         list_del(&page->lru);
4146                         rdata->pages[rdata->nr_pages++] = page;
4147                 }
4148
4149                 if (!rdata->cfile->invalidHandle ||
4150                     !(rc = cifs_reopen_file(rdata->cfile, true)))
4151                         rc = server->ops->async_readv(rdata);
4152                 if (rc) {
4153                         add_credits_and_wake_if(server, rdata->credits, 0);
4154                         for (i = 0; i < rdata->nr_pages; i++) {
4155                                 page = rdata->pages[i];
4156                                 lru_cache_add_file(page);
4157                                 unlock_page(page);
4158                                 put_page(page);
4159                         }
4160                         /* Fallback to the readpage in error/reconnect cases */
4161                         kref_put(&rdata->refcount, cifs_readdata_release);
4162                         break;
4163                 }
4164
4165                 kref_put(&rdata->refcount, cifs_readdata_release);
4166         }
4167
4168         /* Any pages that have been shown to fscache but didn't get added to
4169          * the pagecache must be uncached before they get returned to the
4170          * allocator.
4171          */
4172         cifs_fscache_readpages_cancel(mapping->host, page_list);
4173         free_xid(xid);
4174         return rc;
4175 }
4176
4177 /*
4178  * cifs_readpage_worker must be called with the page pinned
4179  */
4180 static int cifs_readpage_worker(struct file *file, struct page *page,
4181         loff_t *poffset)
4182 {
4183         char *read_data;
4184         int rc;
4185
4186         /* Is the page cached? */
4187         rc = cifs_readpage_from_fscache(file_inode(file), page);
4188         if (rc == 0)
4189                 goto read_complete;
4190
4191         read_data = kmap(page);
4192         /* for reads over a certain size could initiate async read ahead */
4193
4194         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4195
4196         if (rc < 0)
4197                 goto io_error;
4198         else
4199                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4200
4201         /* we do not want atime to be less than mtime, it broke some apps */
4202         file_inode(file)->i_atime = current_time(file_inode(file));
4203         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4204                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4205         else
4206                 file_inode(file)->i_atime = current_time(file_inode(file));
4207
4208         if (PAGE_SIZE > rc)
4209                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4210
4211         flush_dcache_page(page);
4212         SetPageUptodate(page);
4213
4214         /* send this page to the cache */
4215         cifs_readpage_to_fscache(file_inode(file), page);
4216
4217         rc = 0;
4218
4219 io_error:
4220         kunmap(page);
4221         unlock_page(page);
4222
4223 read_complete:
4224         return rc;
4225 }
4226
4227 static int cifs_readpage(struct file *file, struct page *page)
4228 {
4229         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4230         int rc = -EACCES;
4231         unsigned int xid;
4232
4233         xid = get_xid();
4234
4235         if (file->private_data == NULL) {
4236                 rc = -EBADF;
4237                 free_xid(xid);
4238                 return rc;
4239         }
4240
4241         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4242                  page, (int)offset, (int)offset);
4243
4244         rc = cifs_readpage_worker(file, page, &offset);
4245
4246         free_xid(xid);
4247         return rc;
4248 }
4249
4250 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4251 {
4252         struct cifsFileInfo *open_file;
4253         struct cifs_tcon *tcon =
4254                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4255
4256         spin_lock(&tcon->open_file_lock);
4257         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4258                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4259                         spin_unlock(&tcon->open_file_lock);
4260                         return 1;
4261                 }
4262         }
4263         spin_unlock(&tcon->open_file_lock);
4264         return 0;
4265 }
4266
4267 /* We do not want to update the file size from server for inodes
4268    open for write - to avoid races with writepage extending
4269    the file - in the future we could consider allowing
4270    refreshing the inode only on increases in the file size
4271    but this is tricky to do without racing with writebehind
4272    page caching in the current Linux kernel design */
4273 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4274 {
4275         if (!cifsInode)
4276                 return true;
4277
4278         if (is_inode_writable(cifsInode)) {
4279                 /* This inode is open for write at least once */
4280                 struct cifs_sb_info *cifs_sb;
4281
4282                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4283                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4284                         /* since no page cache to corrupt on directio
4285                         we can change size safely */
4286                         return true;
4287                 }
4288
4289                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4290                         return true;
4291
4292                 return false;
4293         } else
4294                 return true;
4295 }
4296
4297 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4298                         loff_t pos, unsigned len, unsigned flags,
4299                         struct page **pagep, void **fsdata)
4300 {
4301         int oncethru = 0;
4302         pgoff_t index = pos >> PAGE_SHIFT;
4303         loff_t offset = pos & (PAGE_SIZE - 1);
4304         loff_t page_start = pos & PAGE_MASK;
4305         loff_t i_size;
4306         struct page *page;
4307         int rc = 0;
4308
4309         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4310
4311 start:
4312         page = grab_cache_page_write_begin(mapping, index, flags);
4313         if (!page) {
4314                 rc = -ENOMEM;
4315                 goto out;
4316         }
4317
4318         if (PageUptodate(page))
4319                 goto out;
4320
4321         /*
4322          * If we write a full page it will be up to date, no need to read from
4323          * the server. If the write is short, we'll end up doing a sync write
4324          * instead.
4325          */
4326         if (len == PAGE_SIZE)
4327                 goto out;
4328
4329         /*
4330          * optimize away the read when we have an oplock, and we're not
4331          * expecting to use any of the data we'd be reading in. That
4332          * is, when the page lies beyond the EOF, or straddles the EOF
4333          * and the write will cover all of the existing data.
4334          */
4335         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4336                 i_size = i_size_read(mapping->host);
4337                 if (page_start >= i_size ||
4338                     (offset == 0 && (pos + len) >= i_size)) {
4339                         zero_user_segments(page, 0, offset,
4340                                            offset + len,
4341                                            PAGE_SIZE);
4342                         /*
4343                          * PageChecked means that the parts of the page
4344                          * to which we're not writing are considered up
4345                          * to date. Once the data is copied to the
4346                          * page, it can be set uptodate.
4347                          */
4348                         SetPageChecked(page);
4349                         goto out;
4350                 }
4351         }
4352
4353         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4354                 /*
4355                  * might as well read a page, it is fast enough. If we get
4356                  * an error, we don't need to return it. cifs_write_end will
4357                  * do a sync write instead since PG_uptodate isn't set.
4358                  */
4359                 cifs_readpage_worker(file, page, &page_start);
4360                 put_page(page);
4361                 oncethru = 1;
4362                 goto start;
4363         } else {
4364                 /* we could try using another file handle if there is one -
4365                    but how would we lock it to prevent close of that handle
4366                    racing with this read? In any case
4367                    this will be written out by write_end so is fine */
4368         }
4369 out:
4370         *pagep = page;
4371         return rc;
4372 }
4373
4374 static int cifs_release_page(struct page *page, gfp_t gfp)
4375 {
4376         if (PagePrivate(page))
4377                 return 0;
4378
4379         return cifs_fscache_release_page(page, gfp);
4380 }
4381
4382 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4383                                  unsigned int length)
4384 {
4385         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4386
4387         if (offset == 0 && length == PAGE_SIZE)
4388                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4389 }
4390
4391 static int cifs_launder_page(struct page *page)
4392 {
4393         int rc = 0;
4394         loff_t range_start = page_offset(page);
4395         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4396         struct writeback_control wbc = {
4397                 .sync_mode = WB_SYNC_ALL,
4398                 .nr_to_write = 0,
4399                 .range_start = range_start,
4400                 .range_end = range_end,
4401         };
4402
4403         cifs_dbg(FYI, "Launder page: %p\n", page);
4404
4405         if (clear_page_dirty_for_io(page))
4406                 rc = cifs_writepage_locked(page, &wbc);
4407
4408         cifs_fscache_invalidate_page(page, page->mapping->host);
4409         return rc;
4410 }
4411
4412 void cifs_oplock_break(struct work_struct *work)
4413 {
4414         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4415                                                   oplock_break);
4416         struct inode *inode = d_inode(cfile->dentry);
4417         struct cifsInodeInfo *cinode = CIFS_I(inode);
4418         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4419         struct TCP_Server_Info *server = tcon->ses->server;
4420         int rc = 0;
4421
4422         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4423                         TASK_UNINTERRUPTIBLE);
4424
4425         server->ops->downgrade_oplock(server, cinode,
4426                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4427
4428         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4429                                                 cifs_has_mand_locks(cinode)) {
4430                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4431                          inode);
4432                 cinode->oplock = 0;
4433         }
4434
4435         if (inode && S_ISREG(inode->i_mode)) {
4436                 if (CIFS_CACHE_READ(cinode))
4437                         break_lease(inode, O_RDONLY);
4438                 else
4439                         break_lease(inode, O_WRONLY);
4440                 rc = filemap_fdatawrite(inode->i_mapping);
4441                 if (!CIFS_CACHE_READ(cinode)) {
4442                         rc = filemap_fdatawait(inode->i_mapping);
4443                         mapping_set_error(inode->i_mapping, rc);
4444                         cifs_zap_mapping(inode);
4445                 }
4446                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4447         }
4448
4449         rc = cifs_push_locks(cfile);
4450         if (rc)
4451                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4452
4453         /*
4454          * releasing stale oplock after recent reconnect of smb session using
4455          * a now incorrect file handle is not a data integrity issue but do
4456          * not bother sending an oplock release if session to server still is
4457          * disconnected since oplock already released by the server
4458          */
4459         if (!cfile->oplock_break_cancelled) {
4460                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4461                                                              cinode);
4462                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4463         }
4464         cifs_done_oplock_break(cinode);
4465 }
4466
4467 /*
4468  * The presence of cifs_direct_io() in the address space ops vector
4469  * allowes open() O_DIRECT flags which would have failed otherwise.
4470  *
4471  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4472  * so this method should never be called.
4473  *
4474  * Direct IO is not yet supported in the cached mode. 
4475  */
4476 static ssize_t
4477 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4478 {
4479         /*
4480          * FIXME
4481          * Eventually need to support direct IO for non forcedirectio mounts
4482          */
4483         return -EINVAL;
4484 }
4485
4486
4487 const struct address_space_operations cifs_addr_ops = {
4488         .readpage = cifs_readpage,
4489         .readpages = cifs_readpages,
4490         .writepage = cifs_writepage,
4491         .writepages = cifs_writepages,
4492         .write_begin = cifs_write_begin,
4493         .write_end = cifs_write_end,
4494         .set_page_dirty = __set_page_dirty_nobuffers,
4495         .releasepage = cifs_release_page,
4496         .direct_IO = cifs_direct_io,
4497         .invalidatepage = cifs_invalidate_page,
4498         .launder_page = cifs_launder_page,
4499 };
4500
4501 /*
4502  * cifs_readpages requires the server to support a buffer large enough to
4503  * contain the header plus one complete page of data.  Otherwise, we need
4504  * to leave cifs_readpages out of the address space operations.
4505  */
4506 const struct address_space_operations cifs_addr_ops_smallbuf = {
4507         .readpage = cifs_readpage,
4508         .writepage = cifs_writepage,
4509         .writepages = cifs_writepages,
4510         .write_begin = cifs_write_begin,
4511         .write_end = cifs_write_end,
4512         .set_page_dirty = __set_page_dirty_nobuffers,
4513         .releasepage = cifs_release_page,
4514         .invalidatepage = cifs_invalidate_page,
4515         .launder_page = cifs_launder_page,
4516 };