Merge tag 'drm-misc-fixes-2019-02-07' of git://anongit.freedesktop.org/drm/drm-misc...
[sfrench/cifs-2.6.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256 out:
257         kfree(buf);
258         return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264         struct cifs_fid_locks *cur;
265         bool has_locks = false;
266
267         down_read(&cinode->lock_sem);
268         list_for_each_entry(cur, &cinode->llist, llist) {
269                 if (!list_empty(&cur->locks)) {
270                         has_locks = true;
271                         break;
272                 }
273         }
274         up_read(&cinode->lock_sem);
275         return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280                   struct tcon_link *tlink, __u32 oplock)
281 {
282         struct dentry *dentry = file_dentry(file);
283         struct inode *inode = d_inode(dentry);
284         struct cifsInodeInfo *cinode = CIFS_I(inode);
285         struct cifsFileInfo *cfile;
286         struct cifs_fid_locks *fdlocks;
287         struct cifs_tcon *tcon = tlink_tcon(tlink);
288         struct TCP_Server_Info *server = tcon->ses->server;
289
290         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291         if (cfile == NULL)
292                 return cfile;
293
294         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295         if (!fdlocks) {
296                 kfree(cfile);
297                 return NULL;
298         }
299
300         INIT_LIST_HEAD(&fdlocks->locks);
301         fdlocks->cfile = cfile;
302         cfile->llist = fdlocks;
303         down_write(&cinode->lock_sem);
304         list_add(&fdlocks->llist, &cinode->llist);
305         up_write(&cinode->lock_sem);
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->tlink = cifs_get_tlink(tlink);
314         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315         mutex_init(&cfile->fh_mutex);
316         spin_lock_init(&cfile->file_info_lock);
317
318         cifs_sb_active(inode->i_sb);
319
320         /*
321          * If the server returned a read oplock and we have mandatory brlocks,
322          * set oplock level to None.
323          */
324         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326                 oplock = 0;
327         }
328
329         spin_lock(&tcon->open_file_lock);
330         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331                 oplock = fid->pending_open->oplock;
332         list_del(&fid->pending_open->olist);
333
334         fid->purge_cache = false;
335         server->ops->set_fid(cfile, fid, oplock);
336
337         list_add(&cfile->tlist, &tcon->openFileList);
338         atomic_inc(&tcon->num_local_opens);
339
340         /* if readable file instance put first in list*/
341         if (file->f_mode & FMODE_READ)
342                 list_add(&cfile->flist, &cinode->openFileList);
343         else
344                 list_add_tail(&cfile->flist, &cinode->openFileList);
345         spin_unlock(&tcon->open_file_lock);
346
347         if (fid->purge_cache)
348                 cifs_zap_mapping(inode);
349
350         file->private_data = cfile;
351         return cfile;
352 }
353
354 struct cifsFileInfo *
355 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
356 {
357         spin_lock(&cifs_file->file_info_lock);
358         cifsFileInfo_get_locked(cifs_file);
359         spin_unlock(&cifs_file->file_info_lock);
360         return cifs_file;
361 }
362
363 /*
364  * Release a reference on the file private data. This may involve closing
365  * the filehandle out on the server. Must be called without holding
366  * tcon->open_file_lock and cifs_file->file_info_lock.
367  */
368 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
369 {
370         struct inode *inode = d_inode(cifs_file->dentry);
371         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
372         struct TCP_Server_Info *server = tcon->ses->server;
373         struct cifsInodeInfo *cifsi = CIFS_I(inode);
374         struct super_block *sb = inode->i_sb;
375         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
376         struct cifsLockInfo *li, *tmp;
377         struct cifs_fid fid;
378         struct cifs_pending_open open;
379         bool oplock_break_cancelled;
380
381         spin_lock(&tcon->open_file_lock);
382
383         spin_lock(&cifs_file->file_info_lock);
384         if (--cifs_file->count > 0) {
385                 spin_unlock(&cifs_file->file_info_lock);
386                 spin_unlock(&tcon->open_file_lock);
387                 return;
388         }
389         spin_unlock(&cifs_file->file_info_lock);
390
391         if (server->ops->get_lease_key)
392                 server->ops->get_lease_key(inode, &fid);
393
394         /* store open in pending opens to make sure we don't miss lease break */
395         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
396
397         /* remove it from the lists */
398         list_del(&cifs_file->flist);
399         list_del(&cifs_file->tlist);
400         atomic_dec(&tcon->num_local_opens);
401
402         if (list_empty(&cifsi->openFileList)) {
403                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
404                          d_inode(cifs_file->dentry));
405                 /*
406                  * In strict cache mode we need invalidate mapping on the last
407                  * close  because it may cause a error when we open this file
408                  * again and get at least level II oplock.
409                  */
410                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
411                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
412                 cifs_set_oplock_level(cifsi, 0);
413         }
414
415         spin_unlock(&tcon->open_file_lock);
416
417         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
418
419         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
420                 struct TCP_Server_Info *server = tcon->ses->server;
421                 unsigned int xid;
422
423                 xid = get_xid();
424                 if (server->ops->close)
425                         server->ops->close(xid, tcon, &cifs_file->fid);
426                 _free_xid(xid);
427         }
428
429         if (oplock_break_cancelled)
430                 cifs_done_oplock_break(cifsi);
431
432         cifs_del_pending_open(&open);
433
434         /*
435          * Delete any outstanding lock records. We'll lose them when the file
436          * is closed anyway.
437          */
438         down_write(&cifsi->lock_sem);
439         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
440                 list_del(&li->llist);
441                 cifs_del_lock_waiters(li);
442                 kfree(li);
443         }
444         list_del(&cifs_file->llist->llist);
445         kfree(cifs_file->llist);
446         up_write(&cifsi->lock_sem);
447
448         cifs_put_tlink(cifs_file->tlink);
449         dput(cifs_file->dentry);
450         cifs_sb_deactive(sb);
451         kfree(cifs_file);
452 }
453
454 int cifs_open(struct inode *inode, struct file *file)
455
456 {
457         int rc = -EACCES;
458         unsigned int xid;
459         __u32 oplock;
460         struct cifs_sb_info *cifs_sb;
461         struct TCP_Server_Info *server;
462         struct cifs_tcon *tcon;
463         struct tcon_link *tlink;
464         struct cifsFileInfo *cfile = NULL;
465         char *full_path = NULL;
466         bool posix_open_ok = false;
467         struct cifs_fid fid;
468         struct cifs_pending_open open;
469
470         xid = get_xid();
471
472         cifs_sb = CIFS_SB(inode->i_sb);
473         tlink = cifs_sb_tlink(cifs_sb);
474         if (IS_ERR(tlink)) {
475                 free_xid(xid);
476                 return PTR_ERR(tlink);
477         }
478         tcon = tlink_tcon(tlink);
479         server = tcon->ses->server;
480
481         full_path = build_path_from_dentry(file_dentry(file));
482         if (full_path == NULL) {
483                 rc = -ENOMEM;
484                 goto out;
485         }
486
487         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
488                  inode, file->f_flags, full_path);
489
490         if (file->f_flags & O_DIRECT &&
491             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
492                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
493                         file->f_op = &cifs_file_direct_nobrl_ops;
494                 else
495                         file->f_op = &cifs_file_direct_ops;
496         }
497
498         if (server->oplocks)
499                 oplock = REQ_OPLOCK;
500         else
501                 oplock = 0;
502
503         if (!tcon->broken_posix_open && tcon->unix_ext &&
504             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
505                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506                 /* can not refresh inode info since size could be stale */
507                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
508                                 cifs_sb->mnt_file_mode /* ignored */,
509                                 file->f_flags, &oplock, &fid.netfid, xid);
510                 if (rc == 0) {
511                         cifs_dbg(FYI, "posix open succeeded\n");
512                         posix_open_ok = true;
513                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
514                         if (tcon->ses->serverNOS)
515                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
516                                          tcon->ses->serverName,
517                                          tcon->ses->serverNOS);
518                         tcon->broken_posix_open = true;
519                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
520                          (rc != -EOPNOTSUPP)) /* path not found or net err */
521                         goto out;
522                 /*
523                  * Else fallthrough to retry open the old way on network i/o
524                  * or DFS errors.
525                  */
526         }
527
528         if (server->ops->get_lease_key)
529                 server->ops->get_lease_key(inode, &fid);
530
531         cifs_add_pending_open(&fid, tlink, &open);
532
533         if (!posix_open_ok) {
534                 if (server->ops->get_lease_key)
535                         server->ops->get_lease_key(inode, &fid);
536
537                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
538                                   file->f_flags, &oplock, &fid, xid);
539                 if (rc) {
540                         cifs_del_pending_open(&open);
541                         goto out;
542                 }
543         }
544
545         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
546         if (cfile == NULL) {
547                 if (server->ops->close)
548                         server->ops->close(xid, tcon, &fid);
549                 cifs_del_pending_open(&open);
550                 rc = -ENOMEM;
551                 goto out;
552         }
553
554         cifs_fscache_set_inode_cookie(inode, file);
555
556         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
557                 /*
558                  * Time to set mode which we can not set earlier due to
559                  * problems creating new read-only files.
560                  */
561                 struct cifs_unix_set_info_args args = {
562                         .mode   = inode->i_mode,
563                         .uid    = INVALID_UID, /* no change */
564                         .gid    = INVALID_GID, /* no change */
565                         .ctime  = NO_CHANGE_64,
566                         .atime  = NO_CHANGE_64,
567                         .mtime  = NO_CHANGE_64,
568                         .device = 0,
569                 };
570                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
571                                        cfile->pid);
572         }
573
574 out:
575         kfree(full_path);
576         free_xid(xid);
577         cifs_put_tlink(tlink);
578         return rc;
579 }
580
581 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
582
583 /*
584  * Try to reacquire byte range locks that were released when session
585  * to server was lost.
586  */
587 static int
588 cifs_relock_file(struct cifsFileInfo *cfile)
589 {
590         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
591         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
592         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
593         int rc = 0;
594
595         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
596         if (cinode->can_cache_brlcks) {
597                 /* can cache locks - no need to relock */
598                 up_read(&cinode->lock_sem);
599                 return rc;
600         }
601
602         if (cap_unix(tcon->ses) &&
603             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
604             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
605                 rc = cifs_push_posix_locks(cfile);
606         else
607                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
608
609         up_read(&cinode->lock_sem);
610         return rc;
611 }
612
613 static int
614 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
615 {
616         int rc = -EACCES;
617         unsigned int xid;
618         __u32 oplock;
619         struct cifs_sb_info *cifs_sb;
620         struct cifs_tcon *tcon;
621         struct TCP_Server_Info *server;
622         struct cifsInodeInfo *cinode;
623         struct inode *inode;
624         char *full_path = NULL;
625         int desired_access;
626         int disposition = FILE_OPEN;
627         int create_options = CREATE_NOT_DIR;
628         struct cifs_open_parms oparms;
629
630         xid = get_xid();
631         mutex_lock(&cfile->fh_mutex);
632         if (!cfile->invalidHandle) {
633                 mutex_unlock(&cfile->fh_mutex);
634                 rc = 0;
635                 free_xid(xid);
636                 return rc;
637         }
638
639         inode = d_inode(cfile->dentry);
640         cifs_sb = CIFS_SB(inode->i_sb);
641         tcon = tlink_tcon(cfile->tlink);
642         server = tcon->ses->server;
643
644         /*
645          * Can not grab rename sem here because various ops, including those
646          * that already have the rename sem can end up causing writepage to get
647          * called and if the server was down that means we end up here, and we
648          * can never tell if the caller already has the rename_sem.
649          */
650         full_path = build_path_from_dentry(cfile->dentry);
651         if (full_path == NULL) {
652                 rc = -ENOMEM;
653                 mutex_unlock(&cfile->fh_mutex);
654                 free_xid(xid);
655                 return rc;
656         }
657
658         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
659                  inode, cfile->f_flags, full_path);
660
661         if (tcon->ses->server->oplocks)
662                 oplock = REQ_OPLOCK;
663         else
664                 oplock = 0;
665
666         if (tcon->unix_ext && cap_unix(tcon->ses) &&
667             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
668                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
669                 /*
670                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
671                  * original open. Must mask them off for a reopen.
672                  */
673                 unsigned int oflags = cfile->f_flags &
674                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
675
676                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
677                                      cifs_sb->mnt_file_mode /* ignored */,
678                                      oflags, &oplock, &cfile->fid.netfid, xid);
679                 if (rc == 0) {
680                         cifs_dbg(FYI, "posix reopen succeeded\n");
681                         oparms.reconnect = true;
682                         goto reopen_success;
683                 }
684                 /*
685                  * fallthrough to retry open the old way on errors, especially
686                  * in the reconnect path it is important to retry hard
687                  */
688         }
689
690         desired_access = cifs_convert_flags(cfile->f_flags);
691
692         if (backup_cred(cifs_sb))
693                 create_options |= CREATE_OPEN_BACKUP_INTENT;
694
695         if (server->ops->get_lease_key)
696                 server->ops->get_lease_key(inode, &cfile->fid);
697
698         oparms.tcon = tcon;
699         oparms.cifs_sb = cifs_sb;
700         oparms.desired_access = desired_access;
701         oparms.create_options = create_options;
702         oparms.disposition = disposition;
703         oparms.path = full_path;
704         oparms.fid = &cfile->fid;
705         oparms.reconnect = true;
706
707         /*
708          * Can not refresh inode by passing in file_info buf to be returned by
709          * ops->open and then calling get_inode_info with returned buf since
710          * file might have write behind data that needs to be flushed and server
711          * version of file size can be stale. If we knew for sure that inode was
712          * not dirty locally we could do this.
713          */
714         rc = server->ops->open(xid, &oparms, &oplock, NULL);
715         if (rc == -ENOENT && oparms.reconnect == false) {
716                 /* durable handle timeout is expired - open the file again */
717                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
718                 /* indicate that we need to relock the file */
719                 oparms.reconnect = true;
720         }
721
722         if (rc) {
723                 mutex_unlock(&cfile->fh_mutex);
724                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
725                 cifs_dbg(FYI, "oplock: %d\n", oplock);
726                 goto reopen_error_exit;
727         }
728
729 reopen_success:
730         cfile->invalidHandle = false;
731         mutex_unlock(&cfile->fh_mutex);
732         cinode = CIFS_I(inode);
733
734         if (can_flush) {
735                 rc = filemap_write_and_wait(inode->i_mapping);
736                 if (!is_interrupt_error(rc))
737                         mapping_set_error(inode->i_mapping, rc);
738
739                 if (tcon->unix_ext)
740                         rc = cifs_get_inode_info_unix(&inode, full_path,
741                                                       inode->i_sb, xid);
742                 else
743                         rc = cifs_get_inode_info(&inode, full_path, NULL,
744                                                  inode->i_sb, xid, NULL);
745         }
746         /*
747          * Else we are writing out data to server already and could deadlock if
748          * we tried to flush data, and since we do not know if we have data that
749          * would invalidate the current end of file on the server we can not go
750          * to the server to get the new inode info.
751          */
752
753         /*
754          * If the server returned a read oplock and we have mandatory brlocks,
755          * set oplock level to None.
756          */
757         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
758                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
759                 oplock = 0;
760         }
761
762         server->ops->set_fid(cfile, &cfile->fid, oplock);
763         if (oparms.reconnect)
764                 cifs_relock_file(cfile);
765
766 reopen_error_exit:
767         kfree(full_path);
768         free_xid(xid);
769         return rc;
770 }
771
772 int cifs_close(struct inode *inode, struct file *file)
773 {
774         if (file->private_data != NULL) {
775                 cifsFileInfo_put(file->private_data);
776                 file->private_data = NULL;
777         }
778
779         /* return code from the ->release op is always ignored */
780         return 0;
781 }
782
783 void
784 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
785 {
786         struct cifsFileInfo *open_file;
787         struct list_head *tmp;
788         struct list_head *tmp1;
789         struct list_head tmp_list;
790
791         if (!tcon->use_persistent || !tcon->need_reopen_files)
792                 return;
793
794         tcon->need_reopen_files = false;
795
796         cifs_dbg(FYI, "Reopen persistent handles");
797         INIT_LIST_HEAD(&tmp_list);
798
799         /* list all files open on tree connection, reopen resilient handles  */
800         spin_lock(&tcon->open_file_lock);
801         list_for_each(tmp, &tcon->openFileList) {
802                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
803                 if (!open_file->invalidHandle)
804                         continue;
805                 cifsFileInfo_get(open_file);
806                 list_add_tail(&open_file->rlist, &tmp_list);
807         }
808         spin_unlock(&tcon->open_file_lock);
809
810         list_for_each_safe(tmp, tmp1, &tmp_list) {
811                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
812                 if (cifs_reopen_file(open_file, false /* do not flush */))
813                         tcon->need_reopen_files = true;
814                 list_del_init(&open_file->rlist);
815                 cifsFileInfo_put(open_file);
816         }
817 }
818
819 int cifs_closedir(struct inode *inode, struct file *file)
820 {
821         int rc = 0;
822         unsigned int xid;
823         struct cifsFileInfo *cfile = file->private_data;
824         struct cifs_tcon *tcon;
825         struct TCP_Server_Info *server;
826         char *buf;
827
828         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
829
830         if (cfile == NULL)
831                 return rc;
832
833         xid = get_xid();
834         tcon = tlink_tcon(cfile->tlink);
835         server = tcon->ses->server;
836
837         cifs_dbg(FYI, "Freeing private data in close dir\n");
838         spin_lock(&cfile->file_info_lock);
839         if (server->ops->dir_needs_close(cfile)) {
840                 cfile->invalidHandle = true;
841                 spin_unlock(&cfile->file_info_lock);
842                 if (server->ops->close_dir)
843                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
844                 else
845                         rc = -ENOSYS;
846                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
847                 /* not much we can do if it fails anyway, ignore rc */
848                 rc = 0;
849         } else
850                 spin_unlock(&cfile->file_info_lock);
851
852         buf = cfile->srch_inf.ntwrk_buf_start;
853         if (buf) {
854                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
855                 cfile->srch_inf.ntwrk_buf_start = NULL;
856                 if (cfile->srch_inf.smallBuf)
857                         cifs_small_buf_release(buf);
858                 else
859                         cifs_buf_release(buf);
860         }
861
862         cifs_put_tlink(cfile->tlink);
863         kfree(file->private_data);
864         file->private_data = NULL;
865         /* BB can we lock the filestruct while this is going on? */
866         free_xid(xid);
867         return rc;
868 }
869
870 static struct cifsLockInfo *
871 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
872 {
873         struct cifsLockInfo *lock =
874                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
875         if (!lock)
876                 return lock;
877         lock->offset = offset;
878         lock->length = length;
879         lock->type = type;
880         lock->pid = current->tgid;
881         lock->flags = flags;
882         INIT_LIST_HEAD(&lock->blist);
883         init_waitqueue_head(&lock->block_q);
884         return lock;
885 }
886
887 void
888 cifs_del_lock_waiters(struct cifsLockInfo *lock)
889 {
890         struct cifsLockInfo *li, *tmp;
891         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
892                 list_del_init(&li->blist);
893                 wake_up(&li->block_q);
894         }
895 }
896
897 #define CIFS_LOCK_OP    0
898 #define CIFS_READ_OP    1
899 #define CIFS_WRITE_OP   2
900
901 /* @rw_check : 0 - no op, 1 - read, 2 - write */
902 static bool
903 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
904                             __u64 length, __u8 type, __u16 flags,
905                             struct cifsFileInfo *cfile,
906                             struct cifsLockInfo **conf_lock, int rw_check)
907 {
908         struct cifsLockInfo *li;
909         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
910         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
911
912         list_for_each_entry(li, &fdlocks->locks, llist) {
913                 if (offset + length <= li->offset ||
914                     offset >= li->offset + li->length)
915                         continue;
916                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
917                     server->ops->compare_fids(cfile, cur_cfile)) {
918                         /* shared lock prevents write op through the same fid */
919                         if (!(li->type & server->vals->shared_lock_type) ||
920                             rw_check != CIFS_WRITE_OP)
921                                 continue;
922                 }
923                 if ((type & server->vals->shared_lock_type) &&
924                     ((server->ops->compare_fids(cfile, cur_cfile) &&
925                      current->tgid == li->pid) || type == li->type))
926                         continue;
927                 if (rw_check == CIFS_LOCK_OP &&
928                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
929                     server->ops->compare_fids(cfile, cur_cfile))
930                         continue;
931                 if (conf_lock)
932                         *conf_lock = li;
933                 return true;
934         }
935         return false;
936 }
937
938 bool
939 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
940                         __u8 type, __u16 flags,
941                         struct cifsLockInfo **conf_lock, int rw_check)
942 {
943         bool rc = false;
944         struct cifs_fid_locks *cur;
945         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
946
947         list_for_each_entry(cur, &cinode->llist, llist) {
948                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
949                                                  flags, cfile, conf_lock,
950                                                  rw_check);
951                 if (rc)
952                         break;
953         }
954
955         return rc;
956 }
957
958 /*
959  * Check if there is another lock that prevents us to set the lock (mandatory
960  * style). If such a lock exists, update the flock structure with its
961  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
962  * or leave it the same if we can't. Returns 0 if we don't need to request to
963  * the server or 1 otherwise.
964  */
965 static int
966 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
967                __u8 type, struct file_lock *flock)
968 {
969         int rc = 0;
970         struct cifsLockInfo *conf_lock;
971         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
972         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
973         bool exist;
974
975         down_read(&cinode->lock_sem);
976
977         exist = cifs_find_lock_conflict(cfile, offset, length, type,
978                                         flock->fl_flags, &conf_lock,
979                                         CIFS_LOCK_OP);
980         if (exist) {
981                 flock->fl_start = conf_lock->offset;
982                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
983                 flock->fl_pid = conf_lock->pid;
984                 if (conf_lock->type & server->vals->shared_lock_type)
985                         flock->fl_type = F_RDLCK;
986                 else
987                         flock->fl_type = F_WRLCK;
988         } else if (!cinode->can_cache_brlcks)
989                 rc = 1;
990         else
991                 flock->fl_type = F_UNLCK;
992
993         up_read(&cinode->lock_sem);
994         return rc;
995 }
996
997 static void
998 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
999 {
1000         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1001         down_write(&cinode->lock_sem);
1002         list_add_tail(&lock->llist, &cfile->llist->locks);
1003         up_write(&cinode->lock_sem);
1004 }
1005
1006 /*
1007  * Set the byte-range lock (mandatory style). Returns:
1008  * 1) 0, if we set the lock and don't need to request to the server;
1009  * 2) 1, if no locks prevent us but we need to request to the server;
1010  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1011  */
1012 static int
1013 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1014                  bool wait)
1015 {
1016         struct cifsLockInfo *conf_lock;
1017         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1018         bool exist;
1019         int rc = 0;
1020
1021 try_again:
1022         exist = false;
1023         down_write(&cinode->lock_sem);
1024
1025         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1026                                         lock->type, lock->flags, &conf_lock,
1027                                         CIFS_LOCK_OP);
1028         if (!exist && cinode->can_cache_brlcks) {
1029                 list_add_tail(&lock->llist, &cfile->llist->locks);
1030                 up_write(&cinode->lock_sem);
1031                 return rc;
1032         }
1033
1034         if (!exist)
1035                 rc = 1;
1036         else if (!wait)
1037                 rc = -EACCES;
1038         else {
1039                 list_add_tail(&lock->blist, &conf_lock->blist);
1040                 up_write(&cinode->lock_sem);
1041                 rc = wait_event_interruptible(lock->block_q,
1042                                         (lock->blist.prev == &lock->blist) &&
1043                                         (lock->blist.next == &lock->blist));
1044                 if (!rc)
1045                         goto try_again;
1046                 down_write(&cinode->lock_sem);
1047                 list_del_init(&lock->blist);
1048         }
1049
1050         up_write(&cinode->lock_sem);
1051         return rc;
1052 }
1053
1054 /*
1055  * Check if there is another lock that prevents us to set the lock (posix
1056  * style). If such a lock exists, update the flock structure with its
1057  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1058  * or leave it the same if we can't. Returns 0 if we don't need to request to
1059  * the server or 1 otherwise.
1060  */
1061 static int
1062 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1063 {
1064         int rc = 0;
1065         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1066         unsigned char saved_type = flock->fl_type;
1067
1068         if ((flock->fl_flags & FL_POSIX) == 0)
1069                 return 1;
1070
1071         down_read(&cinode->lock_sem);
1072         posix_test_lock(file, flock);
1073
1074         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1075                 flock->fl_type = saved_type;
1076                 rc = 1;
1077         }
1078
1079         up_read(&cinode->lock_sem);
1080         return rc;
1081 }
1082
1083 /*
1084  * Set the byte-range lock (posix style). Returns:
1085  * 1) 0, if we set the lock and don't need to request to the server;
1086  * 2) 1, if we need to request to the server;
1087  * 3) <0, if the error occurs while setting the lock.
1088  */
1089 static int
1090 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1091 {
1092         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1093         int rc = 1;
1094
1095         if ((flock->fl_flags & FL_POSIX) == 0)
1096                 return rc;
1097
1098 try_again:
1099         down_write(&cinode->lock_sem);
1100         if (!cinode->can_cache_brlcks) {
1101                 up_write(&cinode->lock_sem);
1102                 return rc;
1103         }
1104
1105         rc = posix_lock_file(file, flock, NULL);
1106         up_write(&cinode->lock_sem);
1107         if (rc == FILE_LOCK_DEFERRED) {
1108                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1109                 if (!rc)
1110                         goto try_again;
1111                 locks_delete_block(flock);
1112         }
1113         return rc;
1114 }
1115
1116 int
1117 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1118 {
1119         unsigned int xid;
1120         int rc = 0, stored_rc;
1121         struct cifsLockInfo *li, *tmp;
1122         struct cifs_tcon *tcon;
1123         unsigned int num, max_num, max_buf;
1124         LOCKING_ANDX_RANGE *buf, *cur;
1125         static const int types[] = {
1126                 LOCKING_ANDX_LARGE_FILES,
1127                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1128         };
1129         int i;
1130
1131         xid = get_xid();
1132         tcon = tlink_tcon(cfile->tlink);
1133
1134         /*
1135          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1136          * and check it before using.
1137          */
1138         max_buf = tcon->ses->server->maxBuf;
1139         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1140                 free_xid(xid);
1141                 return -EINVAL;
1142         }
1143
1144         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145                      PAGE_SIZE);
1146         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147                         PAGE_SIZE);
1148         max_num = (max_buf - sizeof(struct smb_hdr)) /
1149                                                 sizeof(LOCKING_ANDX_RANGE);
1150         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1151         if (!buf) {
1152                 free_xid(xid);
1153                 return -ENOMEM;
1154         }
1155
1156         for (i = 0; i < 2; i++) {
1157                 cur = buf;
1158                 num = 0;
1159                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1160                         if (li->type != types[i])
1161                                 continue;
1162                         cur->Pid = cpu_to_le16(li->pid);
1163                         cur->LengthLow = cpu_to_le32((u32)li->length);
1164                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1165                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1166                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1167                         if (++num == max_num) {
1168                                 stored_rc = cifs_lockv(xid, tcon,
1169                                                        cfile->fid.netfid,
1170                                                        (__u8)li->type, 0, num,
1171                                                        buf);
1172                                 if (stored_rc)
1173                                         rc = stored_rc;
1174                                 cur = buf;
1175                                 num = 0;
1176                         } else
1177                                 cur++;
1178                 }
1179
1180                 if (num) {
1181                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1182                                                (__u8)types[i], 0, num, buf);
1183                         if (stored_rc)
1184                                 rc = stored_rc;
1185                 }
1186         }
1187
1188         kfree(buf);
1189         free_xid(xid);
1190         return rc;
1191 }
1192
1193 static __u32
1194 hash_lockowner(fl_owner_t owner)
1195 {
1196         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1197 }
1198
1199 struct lock_to_push {
1200         struct list_head llist;
1201         __u64 offset;
1202         __u64 length;
1203         __u32 pid;
1204         __u16 netfid;
1205         __u8 type;
1206 };
1207
1208 static int
1209 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1210 {
1211         struct inode *inode = d_inode(cfile->dentry);
1212         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1213         struct file_lock *flock;
1214         struct file_lock_context *flctx = inode->i_flctx;
1215         unsigned int count = 0, i;
1216         int rc = 0, xid, type;
1217         struct list_head locks_to_send, *el;
1218         struct lock_to_push *lck, *tmp;
1219         __u64 length;
1220
1221         xid = get_xid();
1222
1223         if (!flctx)
1224                 goto out;
1225
1226         spin_lock(&flctx->flc_lock);
1227         list_for_each(el, &flctx->flc_posix) {
1228                 count++;
1229         }
1230         spin_unlock(&flctx->flc_lock);
1231
1232         INIT_LIST_HEAD(&locks_to_send);
1233
1234         /*
1235          * Allocating count locks is enough because no FL_POSIX locks can be
1236          * added to the list while we are holding cinode->lock_sem that
1237          * protects locking operations of this inode.
1238          */
1239         for (i = 0; i < count; i++) {
1240                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1241                 if (!lck) {
1242                         rc = -ENOMEM;
1243                         goto err_out;
1244                 }
1245                 list_add_tail(&lck->llist, &locks_to_send);
1246         }
1247
1248         el = locks_to_send.next;
1249         spin_lock(&flctx->flc_lock);
1250         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1251                 if (el == &locks_to_send) {
1252                         /*
1253                          * The list ended. We don't have enough allocated
1254                          * structures - something is really wrong.
1255                          */
1256                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1257                         break;
1258                 }
1259                 length = 1 + flock->fl_end - flock->fl_start;
1260                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1261                         type = CIFS_RDLCK;
1262                 else
1263                         type = CIFS_WRLCK;
1264                 lck = list_entry(el, struct lock_to_push, llist);
1265                 lck->pid = hash_lockowner(flock->fl_owner);
1266                 lck->netfid = cfile->fid.netfid;
1267                 lck->length = length;
1268                 lck->type = type;
1269                 lck->offset = flock->fl_start;
1270         }
1271         spin_unlock(&flctx->flc_lock);
1272
1273         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1274                 int stored_rc;
1275
1276                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1277                                              lck->offset, lck->length, NULL,
1278                                              lck->type, 0);
1279                 if (stored_rc)
1280                         rc = stored_rc;
1281                 list_del(&lck->llist);
1282                 kfree(lck);
1283         }
1284
1285 out:
1286         free_xid(xid);
1287         return rc;
1288 err_out:
1289         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1290                 list_del(&lck->llist);
1291                 kfree(lck);
1292         }
1293         goto out;
1294 }
1295
1296 static int
1297 cifs_push_locks(struct cifsFileInfo *cfile)
1298 {
1299         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1300         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1301         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1302         int rc = 0;
1303
1304         /* we are going to update can_cache_brlcks here - need a write access */
1305         down_write(&cinode->lock_sem);
1306         if (!cinode->can_cache_brlcks) {
1307                 up_write(&cinode->lock_sem);
1308                 return rc;
1309         }
1310
1311         if (cap_unix(tcon->ses) &&
1312             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1313             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1314                 rc = cifs_push_posix_locks(cfile);
1315         else
1316                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1317
1318         cinode->can_cache_brlcks = false;
1319         up_write(&cinode->lock_sem);
1320         return rc;
1321 }
1322
1323 static void
1324 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1325                 bool *wait_flag, struct TCP_Server_Info *server)
1326 {
1327         if (flock->fl_flags & FL_POSIX)
1328                 cifs_dbg(FYI, "Posix\n");
1329         if (flock->fl_flags & FL_FLOCK)
1330                 cifs_dbg(FYI, "Flock\n");
1331         if (flock->fl_flags & FL_SLEEP) {
1332                 cifs_dbg(FYI, "Blocking lock\n");
1333                 *wait_flag = true;
1334         }
1335         if (flock->fl_flags & FL_ACCESS)
1336                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1337         if (flock->fl_flags & FL_LEASE)
1338                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1339         if (flock->fl_flags &
1340             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1341                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1342                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1343
1344         *type = server->vals->large_lock_type;
1345         if (flock->fl_type == F_WRLCK) {
1346                 cifs_dbg(FYI, "F_WRLCK\n");
1347                 *type |= server->vals->exclusive_lock_type;
1348                 *lock = 1;
1349         } else if (flock->fl_type == F_UNLCK) {
1350                 cifs_dbg(FYI, "F_UNLCK\n");
1351                 *type |= server->vals->unlock_lock_type;
1352                 *unlock = 1;
1353                 /* Check if unlock includes more than one lock range */
1354         } else if (flock->fl_type == F_RDLCK) {
1355                 cifs_dbg(FYI, "F_RDLCK\n");
1356                 *type |= server->vals->shared_lock_type;
1357                 *lock = 1;
1358         } else if (flock->fl_type == F_EXLCK) {
1359                 cifs_dbg(FYI, "F_EXLCK\n");
1360                 *type |= server->vals->exclusive_lock_type;
1361                 *lock = 1;
1362         } else if (flock->fl_type == F_SHLCK) {
1363                 cifs_dbg(FYI, "F_SHLCK\n");
1364                 *type |= server->vals->shared_lock_type;
1365                 *lock = 1;
1366         } else
1367                 cifs_dbg(FYI, "Unknown type of lock\n");
1368 }
1369
1370 static int
1371 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1372            bool wait_flag, bool posix_lck, unsigned int xid)
1373 {
1374         int rc = 0;
1375         __u64 length = 1 + flock->fl_end - flock->fl_start;
1376         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1377         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1378         struct TCP_Server_Info *server = tcon->ses->server;
1379         __u16 netfid = cfile->fid.netfid;
1380
1381         if (posix_lck) {
1382                 int posix_lock_type;
1383
1384                 rc = cifs_posix_lock_test(file, flock);
1385                 if (!rc)
1386                         return rc;
1387
1388                 if (type & server->vals->shared_lock_type)
1389                         posix_lock_type = CIFS_RDLCK;
1390                 else
1391                         posix_lock_type = CIFS_WRLCK;
1392                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1393                                       hash_lockowner(flock->fl_owner),
1394                                       flock->fl_start, length, flock,
1395                                       posix_lock_type, wait_flag);
1396                 return rc;
1397         }
1398
1399         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1400         if (!rc)
1401                 return rc;
1402
1403         /* BB we could chain these into one lock request BB */
1404         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1405                                     1, 0, false);
1406         if (rc == 0) {
1407                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1408                                             type, 0, 1, false);
1409                 flock->fl_type = F_UNLCK;
1410                 if (rc != 0)
1411                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1412                                  rc);
1413                 return 0;
1414         }
1415
1416         if (type & server->vals->shared_lock_type) {
1417                 flock->fl_type = F_WRLCK;
1418                 return 0;
1419         }
1420
1421         type &= ~server->vals->exclusive_lock_type;
1422
1423         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1424                                     type | server->vals->shared_lock_type,
1425                                     1, 0, false);
1426         if (rc == 0) {
1427                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1428                         type | server->vals->shared_lock_type, 0, 1, false);
1429                 flock->fl_type = F_RDLCK;
1430                 if (rc != 0)
1431                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1432                                  rc);
1433         } else
1434                 flock->fl_type = F_WRLCK;
1435
1436         return 0;
1437 }
1438
1439 void
1440 cifs_move_llist(struct list_head *source, struct list_head *dest)
1441 {
1442         struct list_head *li, *tmp;
1443         list_for_each_safe(li, tmp, source)
1444                 list_move(li, dest);
1445 }
1446
1447 void
1448 cifs_free_llist(struct list_head *llist)
1449 {
1450         struct cifsLockInfo *li, *tmp;
1451         list_for_each_entry_safe(li, tmp, llist, llist) {
1452                 cifs_del_lock_waiters(li);
1453                 list_del(&li->llist);
1454                 kfree(li);
1455         }
1456 }
1457
1458 int
1459 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1460                   unsigned int xid)
1461 {
1462         int rc = 0, stored_rc;
1463         static const int types[] = {
1464                 LOCKING_ANDX_LARGE_FILES,
1465                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1466         };
1467         unsigned int i;
1468         unsigned int max_num, num, max_buf;
1469         LOCKING_ANDX_RANGE *buf, *cur;
1470         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1471         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1472         struct cifsLockInfo *li, *tmp;
1473         __u64 length = 1 + flock->fl_end - flock->fl_start;
1474         struct list_head tmp_llist;
1475
1476         INIT_LIST_HEAD(&tmp_llist);
1477
1478         /*
1479          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1480          * and check it before using.
1481          */
1482         max_buf = tcon->ses->server->maxBuf;
1483         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1484                 return -EINVAL;
1485
1486         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487                      PAGE_SIZE);
1488         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489                         PAGE_SIZE);
1490         max_num = (max_buf - sizeof(struct smb_hdr)) /
1491                                                 sizeof(LOCKING_ANDX_RANGE);
1492         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1493         if (!buf)
1494                 return -ENOMEM;
1495
1496         down_write(&cinode->lock_sem);
1497         for (i = 0; i < 2; i++) {
1498                 cur = buf;
1499                 num = 0;
1500                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1501                         if (flock->fl_start > li->offset ||
1502                             (flock->fl_start + length) <
1503                             (li->offset + li->length))
1504                                 continue;
1505                         if (current->tgid != li->pid)
1506                                 continue;
1507                         if (types[i] != li->type)
1508                                 continue;
1509                         if (cinode->can_cache_brlcks) {
1510                                 /*
1511                                  * We can cache brlock requests - simply remove
1512                                  * a lock from the file's list.
1513                                  */
1514                                 list_del(&li->llist);
1515                                 cifs_del_lock_waiters(li);
1516                                 kfree(li);
1517                                 continue;
1518                         }
1519                         cur->Pid = cpu_to_le16(li->pid);
1520                         cur->LengthLow = cpu_to_le32((u32)li->length);
1521                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1522                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1523                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1524                         /*
1525                          * We need to save a lock here to let us add it again to
1526                          * the file's list if the unlock range request fails on
1527                          * the server.
1528                          */
1529                         list_move(&li->llist, &tmp_llist);
1530                         if (++num == max_num) {
1531                                 stored_rc = cifs_lockv(xid, tcon,
1532                                                        cfile->fid.netfid,
1533                                                        li->type, num, 0, buf);
1534                                 if (stored_rc) {
1535                                         /*
1536                                          * We failed on the unlock range
1537                                          * request - add all locks from the tmp
1538                                          * list to the head of the file's list.
1539                                          */
1540                                         cifs_move_llist(&tmp_llist,
1541                                                         &cfile->llist->locks);
1542                                         rc = stored_rc;
1543                                 } else
1544                                         /*
1545                                          * The unlock range request succeed -
1546                                          * free the tmp list.
1547                                          */
1548                                         cifs_free_llist(&tmp_llist);
1549                                 cur = buf;
1550                                 num = 0;
1551                         } else
1552                                 cur++;
1553                 }
1554                 if (num) {
1555                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1556                                                types[i], num, 0, buf);
1557                         if (stored_rc) {
1558                                 cifs_move_llist(&tmp_llist,
1559                                                 &cfile->llist->locks);
1560                                 rc = stored_rc;
1561                         } else
1562                                 cifs_free_llist(&tmp_llist);
1563                 }
1564         }
1565
1566         up_write(&cinode->lock_sem);
1567         kfree(buf);
1568         return rc;
1569 }
1570
1571 static int
1572 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1573            bool wait_flag, bool posix_lck, int lock, int unlock,
1574            unsigned int xid)
1575 {
1576         int rc = 0;
1577         __u64 length = 1 + flock->fl_end - flock->fl_start;
1578         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1579         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1580         struct TCP_Server_Info *server = tcon->ses->server;
1581         struct inode *inode = d_inode(cfile->dentry);
1582
1583         if (posix_lck) {
1584                 int posix_lock_type;
1585
1586                 rc = cifs_posix_lock_set(file, flock);
1587                 if (!rc || rc < 0)
1588                         return rc;
1589
1590                 if (type & server->vals->shared_lock_type)
1591                         posix_lock_type = CIFS_RDLCK;
1592                 else
1593                         posix_lock_type = CIFS_WRLCK;
1594
1595                 if (unlock == 1)
1596                         posix_lock_type = CIFS_UNLCK;
1597
1598                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1599                                       hash_lockowner(flock->fl_owner),
1600                                       flock->fl_start, length,
1601                                       NULL, posix_lock_type, wait_flag);
1602                 goto out;
1603         }
1604
1605         if (lock) {
1606                 struct cifsLockInfo *lock;
1607
1608                 lock = cifs_lock_init(flock->fl_start, length, type,
1609                                       flock->fl_flags);
1610                 if (!lock)
1611                         return -ENOMEM;
1612
1613                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1614                 if (rc < 0) {
1615                         kfree(lock);
1616                         return rc;
1617                 }
1618                 if (!rc)
1619                         goto out;
1620
1621                 /*
1622                  * Windows 7 server can delay breaking lease from read to None
1623                  * if we set a byte-range lock on a file - break it explicitly
1624                  * before sending the lock to the server to be sure the next
1625                  * read won't conflict with non-overlapted locks due to
1626                  * pagereading.
1627                  */
1628                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1629                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1630                         cifs_zap_mapping(inode);
1631                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1632                                  inode);
1633                         CIFS_I(inode)->oplock = 0;
1634                 }
1635
1636                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637                                             type, 1, 0, wait_flag);
1638                 if (rc) {
1639                         kfree(lock);
1640                         return rc;
1641                 }
1642
1643                 cifs_lock_add(cfile, lock);
1644         } else if (unlock)
1645                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1646
1647 out:
1648         if (flock->fl_flags & FL_POSIX && !rc)
1649                 rc = locks_lock_file_wait(file, flock);
1650         return rc;
1651 }
1652
1653 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1654 {
1655         int rc, xid;
1656         int lock = 0, unlock = 0;
1657         bool wait_flag = false;
1658         bool posix_lck = false;
1659         struct cifs_sb_info *cifs_sb;
1660         struct cifs_tcon *tcon;
1661         struct cifsInodeInfo *cinode;
1662         struct cifsFileInfo *cfile;
1663         __u16 netfid;
1664         __u32 type;
1665
1666         rc = -EACCES;
1667         xid = get_xid();
1668
1669         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1670                  cmd, flock->fl_flags, flock->fl_type,
1671                  flock->fl_start, flock->fl_end);
1672
1673         cfile = (struct cifsFileInfo *)file->private_data;
1674         tcon = tlink_tcon(cfile->tlink);
1675
1676         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1677                         tcon->ses->server);
1678         cifs_sb = CIFS_FILE_SB(file);
1679         netfid = cfile->fid.netfid;
1680         cinode = CIFS_I(file_inode(file));
1681
1682         if (cap_unix(tcon->ses) &&
1683             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1684             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1685                 posix_lck = true;
1686         /*
1687          * BB add code here to normalize offset and length to account for
1688          * negative length which we can not accept over the wire.
1689          */
1690         if (IS_GETLK(cmd)) {
1691                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1692                 free_xid(xid);
1693                 return rc;
1694         }
1695
1696         if (!lock && !unlock) {
1697                 /*
1698                  * if no lock or unlock then nothing to do since we do not
1699                  * know what it is
1700                  */
1701                 free_xid(xid);
1702                 return -EOPNOTSUPP;
1703         }
1704
1705         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1706                         xid);
1707         free_xid(xid);
1708         return rc;
1709 }
1710
1711 /*
1712  * update the file size (if needed) after a write. Should be called with
1713  * the inode->i_lock held
1714  */
1715 void
1716 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1717                       unsigned int bytes_written)
1718 {
1719         loff_t end_of_write = offset + bytes_written;
1720
1721         if (end_of_write > cifsi->server_eof)
1722                 cifsi->server_eof = end_of_write;
1723 }
1724
1725 static ssize_t
1726 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1727            size_t write_size, loff_t *offset)
1728 {
1729         int rc = 0;
1730         unsigned int bytes_written = 0;
1731         unsigned int total_written;
1732         struct cifs_sb_info *cifs_sb;
1733         struct cifs_tcon *tcon;
1734         struct TCP_Server_Info *server;
1735         unsigned int xid;
1736         struct dentry *dentry = open_file->dentry;
1737         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1738         struct cifs_io_parms io_parms;
1739
1740         cifs_sb = CIFS_SB(dentry->d_sb);
1741
1742         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1743                  write_size, *offset, dentry);
1744
1745         tcon = tlink_tcon(open_file->tlink);
1746         server = tcon->ses->server;
1747
1748         if (!server->ops->sync_write)
1749                 return -ENOSYS;
1750
1751         xid = get_xid();
1752
1753         for (total_written = 0; write_size > total_written;
1754              total_written += bytes_written) {
1755                 rc = -EAGAIN;
1756                 while (rc == -EAGAIN) {
1757                         struct kvec iov[2];
1758                         unsigned int len;
1759
1760                         if (open_file->invalidHandle) {
1761                                 /* we could deadlock if we called
1762                                    filemap_fdatawait from here so tell
1763                                    reopen_file not to flush data to
1764                                    server now */
1765                                 rc = cifs_reopen_file(open_file, false);
1766                                 if (rc != 0)
1767                                         break;
1768                         }
1769
1770                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1771                                   (unsigned int)write_size - total_written);
1772                         /* iov[0] is reserved for smb header */
1773                         iov[1].iov_base = (char *)write_data + total_written;
1774                         iov[1].iov_len = len;
1775                         io_parms.pid = pid;
1776                         io_parms.tcon = tcon;
1777                         io_parms.offset = *offset;
1778                         io_parms.length = len;
1779                         rc = server->ops->sync_write(xid, &open_file->fid,
1780                                         &io_parms, &bytes_written, iov, 1);
1781                 }
1782                 if (rc || (bytes_written == 0)) {
1783                         if (total_written)
1784                                 break;
1785                         else {
1786                                 free_xid(xid);
1787                                 return rc;
1788                         }
1789                 } else {
1790                         spin_lock(&d_inode(dentry)->i_lock);
1791                         cifs_update_eof(cifsi, *offset, bytes_written);
1792                         spin_unlock(&d_inode(dentry)->i_lock);
1793                         *offset += bytes_written;
1794                 }
1795         }
1796
1797         cifs_stats_bytes_written(tcon, total_written);
1798
1799         if (total_written > 0) {
1800                 spin_lock(&d_inode(dentry)->i_lock);
1801                 if (*offset > d_inode(dentry)->i_size)
1802                         i_size_write(d_inode(dentry), *offset);
1803                 spin_unlock(&d_inode(dentry)->i_lock);
1804         }
1805         mark_inode_dirty_sync(d_inode(dentry));
1806         free_xid(xid);
1807         return total_written;
1808 }
1809
1810 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1811                                         bool fsuid_only)
1812 {
1813         struct cifsFileInfo *open_file = NULL;
1814         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1815         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1816
1817         /* only filter by fsuid on multiuser mounts */
1818         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1819                 fsuid_only = false;
1820
1821         spin_lock(&tcon->open_file_lock);
1822         /* we could simply get the first_list_entry since write-only entries
1823            are always at the end of the list but since the first entry might
1824            have a close pending, we go through the whole list */
1825         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1826                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1827                         continue;
1828                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1829                         if (!open_file->invalidHandle) {
1830                                 /* found a good file */
1831                                 /* lock it so it will not be closed on us */
1832                                 cifsFileInfo_get(open_file);
1833                                 spin_unlock(&tcon->open_file_lock);
1834                                 return open_file;
1835                         } /* else might as well continue, and look for
1836                              another, or simply have the caller reopen it
1837                              again rather than trying to fix this handle */
1838                 } else /* write only file */
1839                         break; /* write only files are last so must be done */
1840         }
1841         spin_unlock(&tcon->open_file_lock);
1842         return NULL;
1843 }
1844
1845 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1846                                         bool fsuid_only)
1847 {
1848         struct cifsFileInfo *open_file, *inv_file = NULL;
1849         struct cifs_sb_info *cifs_sb;
1850         struct cifs_tcon *tcon;
1851         bool any_available = false;
1852         int rc;
1853         unsigned int refind = 0;
1854
1855         /* Having a null inode here (because mapping->host was set to zero by
1856         the VFS or MM) should not happen but we had reports of on oops (due to
1857         it being zero) during stress testcases so we need to check for it */
1858
1859         if (cifs_inode == NULL) {
1860                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1861                 dump_stack();
1862                 return NULL;
1863         }
1864
1865         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1866         tcon = cifs_sb_master_tcon(cifs_sb);
1867
1868         /* only filter by fsuid on multiuser mounts */
1869         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1870                 fsuid_only = false;
1871
1872         spin_lock(&tcon->open_file_lock);
1873 refind_writable:
1874         if (refind > MAX_REOPEN_ATT) {
1875                 spin_unlock(&tcon->open_file_lock);
1876                 return NULL;
1877         }
1878         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1879                 if (!any_available && open_file->pid != current->tgid)
1880                         continue;
1881                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1882                         continue;
1883                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1884                         if (!open_file->invalidHandle) {
1885                                 /* found a good writable file */
1886                                 cifsFileInfo_get(open_file);
1887                                 spin_unlock(&tcon->open_file_lock);
1888                                 return open_file;
1889                         } else {
1890                                 if (!inv_file)
1891                                         inv_file = open_file;
1892                         }
1893                 }
1894         }
1895         /* couldn't find useable FH with same pid, try any available */
1896         if (!any_available) {
1897                 any_available = true;
1898                 goto refind_writable;
1899         }
1900
1901         if (inv_file) {
1902                 any_available = false;
1903                 cifsFileInfo_get(inv_file);
1904         }
1905
1906         spin_unlock(&tcon->open_file_lock);
1907
1908         if (inv_file) {
1909                 rc = cifs_reopen_file(inv_file, false);
1910                 if (!rc)
1911                         return inv_file;
1912                 else {
1913                         spin_lock(&tcon->open_file_lock);
1914                         list_move_tail(&inv_file->flist,
1915                                         &cifs_inode->openFileList);
1916                         spin_unlock(&tcon->open_file_lock);
1917                         cifsFileInfo_put(inv_file);
1918                         ++refind;
1919                         inv_file = NULL;
1920                         spin_lock(&tcon->open_file_lock);
1921                         goto refind_writable;
1922                 }
1923         }
1924
1925         return NULL;
1926 }
1927
1928 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1929 {
1930         struct address_space *mapping = page->mapping;
1931         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1932         char *write_data;
1933         int rc = -EFAULT;
1934         int bytes_written = 0;
1935         struct inode *inode;
1936         struct cifsFileInfo *open_file;
1937
1938         if (!mapping || !mapping->host)
1939                 return -EFAULT;
1940
1941         inode = page->mapping->host;
1942
1943         offset += (loff_t)from;
1944         write_data = kmap(page);
1945         write_data += from;
1946
1947         if ((to > PAGE_SIZE) || (from > to)) {
1948                 kunmap(page);
1949                 return -EIO;
1950         }
1951
1952         /* racing with truncate? */
1953         if (offset > mapping->host->i_size) {
1954                 kunmap(page);
1955                 return 0; /* don't care */
1956         }
1957
1958         /* check to make sure that we are not extending the file */
1959         if (mapping->host->i_size - offset < (loff_t)to)
1960                 to = (unsigned)(mapping->host->i_size - offset);
1961
1962         open_file = find_writable_file(CIFS_I(mapping->host), false);
1963         if (open_file) {
1964                 bytes_written = cifs_write(open_file, open_file->pid,
1965                                            write_data, to - from, &offset);
1966                 cifsFileInfo_put(open_file);
1967                 /* Does mm or vfs already set times? */
1968                 inode->i_atime = inode->i_mtime = current_time(inode);
1969                 if ((bytes_written > 0) && (offset))
1970                         rc = 0;
1971                 else if (bytes_written < 0)
1972                         rc = bytes_written;
1973         } else {
1974                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1975                 rc = -EIO;
1976         }
1977
1978         kunmap(page);
1979         return rc;
1980 }
1981
1982 static struct cifs_writedata *
1983 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1984                           pgoff_t end, pgoff_t *index,
1985                           unsigned int *found_pages)
1986 {
1987         struct cifs_writedata *wdata;
1988
1989         wdata = cifs_writedata_alloc((unsigned int)tofind,
1990                                      cifs_writev_complete);
1991         if (!wdata)
1992                 return NULL;
1993
1994         *found_pages = find_get_pages_range_tag(mapping, index, end,
1995                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1996         return wdata;
1997 }
1998
1999 static unsigned int
2000 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2001                     struct address_space *mapping,
2002                     struct writeback_control *wbc,
2003                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2004 {
2005         unsigned int nr_pages = 0, i;
2006         struct page *page;
2007
2008         for (i = 0; i < found_pages; i++) {
2009                 page = wdata->pages[i];
2010                 /*
2011                  * At this point we hold neither the i_pages lock nor the
2012                  * page lock: the page may be truncated or invalidated
2013                  * (changing page->mapping to NULL), or even swizzled
2014                  * back from swapper_space to tmpfs file mapping
2015                  */
2016
2017                 if (nr_pages == 0)
2018                         lock_page(page);
2019                 else if (!trylock_page(page))
2020                         break;
2021
2022                 if (unlikely(page->mapping != mapping)) {
2023                         unlock_page(page);
2024                         break;
2025                 }
2026
2027                 if (!wbc->range_cyclic && page->index > end) {
2028                         *done = true;
2029                         unlock_page(page);
2030                         break;
2031                 }
2032
2033                 if (*next && (page->index != *next)) {
2034                         /* Not next consecutive page */
2035                         unlock_page(page);
2036                         break;
2037                 }
2038
2039                 if (wbc->sync_mode != WB_SYNC_NONE)
2040                         wait_on_page_writeback(page);
2041
2042                 if (PageWriteback(page) ||
2043                                 !clear_page_dirty_for_io(page)) {
2044                         unlock_page(page);
2045                         break;
2046                 }
2047
2048                 /*
2049                  * This actually clears the dirty bit in the radix tree.
2050                  * See cifs_writepage() for more commentary.
2051                  */
2052                 set_page_writeback(page);
2053                 if (page_offset(page) >= i_size_read(mapping->host)) {
2054                         *done = true;
2055                         unlock_page(page);
2056                         end_page_writeback(page);
2057                         break;
2058                 }
2059
2060                 wdata->pages[i] = page;
2061                 *next = page->index + 1;
2062                 ++nr_pages;
2063         }
2064
2065         /* reset index to refind any pages skipped */
2066         if (nr_pages == 0)
2067                 *index = wdata->pages[0]->index + 1;
2068
2069         /* put any pages we aren't going to use */
2070         for (i = nr_pages; i < found_pages; i++) {
2071                 put_page(wdata->pages[i]);
2072                 wdata->pages[i] = NULL;
2073         }
2074
2075         return nr_pages;
2076 }
2077
2078 static int
2079 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2080                  struct address_space *mapping, struct writeback_control *wbc)
2081 {
2082         int rc = 0;
2083         struct TCP_Server_Info *server;
2084         unsigned int i;
2085
2086         wdata->sync_mode = wbc->sync_mode;
2087         wdata->nr_pages = nr_pages;
2088         wdata->offset = page_offset(wdata->pages[0]);
2089         wdata->pagesz = PAGE_SIZE;
2090         wdata->tailsz = min(i_size_read(mapping->host) -
2091                         page_offset(wdata->pages[nr_pages - 1]),
2092                         (loff_t)PAGE_SIZE);
2093         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2094
2095         if (wdata->cfile != NULL)
2096                 cifsFileInfo_put(wdata->cfile);
2097         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2098         if (!wdata->cfile) {
2099                 cifs_dbg(VFS, "No writable handles for inode\n");
2100                 rc = -EBADF;
2101         } else {
2102                 wdata->pid = wdata->cfile->pid;
2103                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2104                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2105         }
2106
2107         for (i = 0; i < nr_pages; ++i)
2108                 unlock_page(wdata->pages[i]);
2109
2110         return rc;
2111 }
2112
2113 static int cifs_writepages(struct address_space *mapping,
2114                            struct writeback_control *wbc)
2115 {
2116         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2117         struct TCP_Server_Info *server;
2118         bool done = false, scanned = false, range_whole = false;
2119         pgoff_t end, index;
2120         struct cifs_writedata *wdata;
2121         int rc = 0;
2122         int saved_rc = 0;
2123         unsigned int xid;
2124
2125         /*
2126          * If wsize is smaller than the page cache size, default to writing
2127          * one page at a time via cifs_writepage
2128          */
2129         if (cifs_sb->wsize < PAGE_SIZE)
2130                 return generic_writepages(mapping, wbc);
2131
2132         xid = get_xid();
2133         if (wbc->range_cyclic) {
2134                 index = mapping->writeback_index; /* Start from prev offset */
2135                 end = -1;
2136         } else {
2137                 index = wbc->range_start >> PAGE_SHIFT;
2138                 end = wbc->range_end >> PAGE_SHIFT;
2139                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2140                         range_whole = true;
2141                 scanned = true;
2142         }
2143         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2144 retry:
2145         while (!done && index <= end) {
2146                 unsigned int i, nr_pages, found_pages, wsize, credits;
2147                 pgoff_t next = 0, tofind, saved_index = index;
2148
2149                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2150                                                    &wsize, &credits);
2151                 if (rc != 0) {
2152                         done = true;
2153                         break;
2154                 }
2155
2156                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2157
2158                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2159                                                   &found_pages);
2160                 if (!wdata) {
2161                         rc = -ENOMEM;
2162                         done = true;
2163                         add_credits_and_wake_if(server, credits, 0);
2164                         break;
2165                 }
2166
2167                 if (found_pages == 0) {
2168                         kref_put(&wdata->refcount, cifs_writedata_release);
2169                         add_credits_and_wake_if(server, credits, 0);
2170                         break;
2171                 }
2172
2173                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2174                                                end, &index, &next, &done);
2175
2176                 /* nothing to write? */
2177                 if (nr_pages == 0) {
2178                         kref_put(&wdata->refcount, cifs_writedata_release);
2179                         add_credits_and_wake_if(server, credits, 0);
2180                         continue;
2181                 }
2182
2183                 wdata->credits = credits;
2184
2185                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2186
2187                 /* send failure -- clean up the mess */
2188                 if (rc != 0) {
2189                         add_credits_and_wake_if(server, wdata->credits, 0);
2190                         for (i = 0; i < nr_pages; ++i) {
2191                                 if (is_retryable_error(rc))
2192                                         redirty_page_for_writepage(wbc,
2193                                                            wdata->pages[i]);
2194                                 else
2195                                         SetPageError(wdata->pages[i]);
2196                                 end_page_writeback(wdata->pages[i]);
2197                                 put_page(wdata->pages[i]);
2198                         }
2199                         if (!is_retryable_error(rc))
2200                                 mapping_set_error(mapping, rc);
2201                 }
2202                 kref_put(&wdata->refcount, cifs_writedata_release);
2203
2204                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2205                         index = saved_index;
2206                         continue;
2207                 }
2208
2209                 /* Return immediately if we received a signal during writing */
2210                 if (is_interrupt_error(rc)) {
2211                         done = true;
2212                         break;
2213                 }
2214
2215                 if (rc != 0 && saved_rc == 0)
2216                         saved_rc = rc;
2217
2218                 wbc->nr_to_write -= nr_pages;
2219                 if (wbc->nr_to_write <= 0)
2220                         done = true;
2221
2222                 index = next;
2223         }
2224
2225         if (!scanned && !done) {
2226                 /*
2227                  * We hit the last page and there is more work to be done: wrap
2228                  * back to the start of the file
2229                  */
2230                 scanned = true;
2231                 index = 0;
2232                 goto retry;
2233         }
2234
2235         if (saved_rc != 0)
2236                 rc = saved_rc;
2237
2238         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2239                 mapping->writeback_index = index;
2240
2241         free_xid(xid);
2242         return rc;
2243 }
2244
2245 static int
2246 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2247 {
2248         int rc;
2249         unsigned int xid;
2250
2251         xid = get_xid();
2252 /* BB add check for wbc flags */
2253         get_page(page);
2254         if (!PageUptodate(page))
2255                 cifs_dbg(FYI, "ppw - page not up to date\n");
2256
2257         /*
2258          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2259          *
2260          * A writepage() implementation always needs to do either this,
2261          * or re-dirty the page with "redirty_page_for_writepage()" in
2262          * the case of a failure.
2263          *
2264          * Just unlocking the page will cause the radix tree tag-bits
2265          * to fail to update with the state of the page correctly.
2266          */
2267         set_page_writeback(page);
2268 retry_write:
2269         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2270         if (is_retryable_error(rc)) {
2271                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2272                         goto retry_write;
2273                 redirty_page_for_writepage(wbc, page);
2274         } else if (rc != 0) {
2275                 SetPageError(page);
2276                 mapping_set_error(page->mapping, rc);
2277         } else {
2278                 SetPageUptodate(page);
2279         }
2280         end_page_writeback(page);
2281         put_page(page);
2282         free_xid(xid);
2283         return rc;
2284 }
2285
2286 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2287 {
2288         int rc = cifs_writepage_locked(page, wbc);
2289         unlock_page(page);
2290         return rc;
2291 }
2292
2293 static int cifs_write_end(struct file *file, struct address_space *mapping,
2294                         loff_t pos, unsigned len, unsigned copied,
2295                         struct page *page, void *fsdata)
2296 {
2297         int rc;
2298         struct inode *inode = mapping->host;
2299         struct cifsFileInfo *cfile = file->private_data;
2300         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2301         __u32 pid;
2302
2303         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2304                 pid = cfile->pid;
2305         else
2306                 pid = current->tgid;
2307
2308         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2309                  page, pos, copied);
2310
2311         if (PageChecked(page)) {
2312                 if (copied == len)
2313                         SetPageUptodate(page);
2314                 ClearPageChecked(page);
2315         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2316                 SetPageUptodate(page);
2317
2318         if (!PageUptodate(page)) {
2319                 char *page_data;
2320                 unsigned offset = pos & (PAGE_SIZE - 1);
2321                 unsigned int xid;
2322
2323                 xid = get_xid();
2324                 /* this is probably better than directly calling
2325                    partialpage_write since in this function the file handle is
2326                    known which we might as well leverage */
2327                 /* BB check if anything else missing out of ppw
2328                    such as updating last write time */
2329                 page_data = kmap(page);
2330                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2331                 /* if (rc < 0) should we set writebehind rc? */
2332                 kunmap(page);
2333
2334                 free_xid(xid);
2335         } else {
2336                 rc = copied;
2337                 pos += copied;
2338                 set_page_dirty(page);
2339         }
2340
2341         if (rc > 0) {
2342                 spin_lock(&inode->i_lock);
2343                 if (pos > inode->i_size)
2344                         i_size_write(inode, pos);
2345                 spin_unlock(&inode->i_lock);
2346         }
2347
2348         unlock_page(page);
2349         put_page(page);
2350
2351         return rc;
2352 }
2353
2354 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2355                       int datasync)
2356 {
2357         unsigned int xid;
2358         int rc = 0;
2359         struct cifs_tcon *tcon;
2360         struct TCP_Server_Info *server;
2361         struct cifsFileInfo *smbfile = file->private_data;
2362         struct inode *inode = file_inode(file);
2363         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2364
2365         rc = file_write_and_wait_range(file, start, end);
2366         if (rc)
2367                 return rc;
2368         inode_lock(inode);
2369
2370         xid = get_xid();
2371
2372         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2373                  file, datasync);
2374
2375         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2376                 rc = cifs_zap_mapping(inode);
2377                 if (rc) {
2378                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2379                         rc = 0; /* don't care about it in fsync */
2380                 }
2381         }
2382
2383         tcon = tlink_tcon(smbfile->tlink);
2384         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2385                 server = tcon->ses->server;
2386                 if (server->ops->flush)
2387                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2388                 else
2389                         rc = -ENOSYS;
2390         }
2391
2392         free_xid(xid);
2393         inode_unlock(inode);
2394         return rc;
2395 }
2396
2397 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2398 {
2399         unsigned int xid;
2400         int rc = 0;
2401         struct cifs_tcon *tcon;
2402         struct TCP_Server_Info *server;
2403         struct cifsFileInfo *smbfile = file->private_data;
2404         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2405         struct inode *inode = file->f_mapping->host;
2406
2407         rc = file_write_and_wait_range(file, start, end);
2408         if (rc)
2409                 return rc;
2410         inode_lock(inode);
2411
2412         xid = get_xid();
2413
2414         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2415                  file, datasync);
2416
2417         tcon = tlink_tcon(smbfile->tlink);
2418         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2419                 server = tcon->ses->server;
2420                 if (server->ops->flush)
2421                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2422                 else
2423                         rc = -ENOSYS;
2424         }
2425
2426         free_xid(xid);
2427         inode_unlock(inode);
2428         return rc;
2429 }
2430
2431 /*
2432  * As file closes, flush all cached write data for this inode checking
2433  * for write behind errors.
2434  */
2435 int cifs_flush(struct file *file, fl_owner_t id)
2436 {
2437         struct inode *inode = file_inode(file);
2438         int rc = 0;
2439
2440         if (file->f_mode & FMODE_WRITE)
2441                 rc = filemap_write_and_wait(inode->i_mapping);
2442
2443         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2444
2445         return rc;
2446 }
2447
2448 static int
2449 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2450 {
2451         int rc = 0;
2452         unsigned long i;
2453
2454         for (i = 0; i < num_pages; i++) {
2455                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2456                 if (!pages[i]) {
2457                         /*
2458                          * save number of pages we have already allocated and
2459                          * return with ENOMEM error
2460                          */
2461                         num_pages = i;
2462                         rc = -ENOMEM;
2463                         break;
2464                 }
2465         }
2466
2467         if (rc) {
2468                 for (i = 0; i < num_pages; i++)
2469                         put_page(pages[i]);
2470         }
2471         return rc;
2472 }
2473
2474 static inline
2475 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2476 {
2477         size_t num_pages;
2478         size_t clen;
2479
2480         clen = min_t(const size_t, len, wsize);
2481         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2482
2483         if (cur_len)
2484                 *cur_len = clen;
2485
2486         return num_pages;
2487 }
2488
2489 static void
2490 cifs_uncached_writedata_release(struct kref *refcount)
2491 {
2492         int i;
2493         struct cifs_writedata *wdata = container_of(refcount,
2494                                         struct cifs_writedata, refcount);
2495
2496         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2497         for (i = 0; i < wdata->nr_pages; i++)
2498                 put_page(wdata->pages[i]);
2499         cifs_writedata_release(refcount);
2500 }
2501
2502 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2503
2504 static void
2505 cifs_uncached_writev_complete(struct work_struct *work)
2506 {
2507         struct cifs_writedata *wdata = container_of(work,
2508                                         struct cifs_writedata, work);
2509         struct inode *inode = d_inode(wdata->cfile->dentry);
2510         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2511
2512         spin_lock(&inode->i_lock);
2513         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2514         if (cifsi->server_eof > inode->i_size)
2515                 i_size_write(inode, cifsi->server_eof);
2516         spin_unlock(&inode->i_lock);
2517
2518         complete(&wdata->done);
2519         collect_uncached_write_data(wdata->ctx);
2520         /* the below call can possibly free the last ref to aio ctx */
2521         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2522 }
2523
2524 static int
2525 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2526                       size_t *len, unsigned long *num_pages)
2527 {
2528         size_t save_len, copied, bytes, cur_len = *len;
2529         unsigned long i, nr_pages = *num_pages;
2530
2531         save_len = cur_len;
2532         for (i = 0; i < nr_pages; i++) {
2533                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2534                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2535                 cur_len -= copied;
2536                 /*
2537                  * If we didn't copy as much as we expected, then that
2538                  * may mean we trod into an unmapped area. Stop copying
2539                  * at that point. On the next pass through the big
2540                  * loop, we'll likely end up getting a zero-length
2541                  * write and bailing out of it.
2542                  */
2543                 if (copied < bytes)
2544                         break;
2545         }
2546         cur_len = save_len - cur_len;
2547         *len = cur_len;
2548
2549         /*
2550          * If we have no data to send, then that probably means that
2551          * the copy above failed altogether. That's most likely because
2552          * the address in the iovec was bogus. Return -EFAULT and let
2553          * the caller free anything we allocated and bail out.
2554          */
2555         if (!cur_len)
2556                 return -EFAULT;
2557
2558         /*
2559          * i + 1 now represents the number of pages we actually used in
2560          * the copy phase above.
2561          */
2562         *num_pages = i + 1;
2563         return 0;
2564 }
2565
2566 static int
2567 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2568         struct cifs_aio_ctx *ctx)
2569 {
2570         unsigned int wsize, credits;
2571         int rc;
2572         struct TCP_Server_Info *server =
2573                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2574
2575         /*
2576          * Wait for credits to resend this wdata.
2577          * Note: we are attempting to resend the whole wdata not in segments
2578          */
2579         do {
2580                 rc = server->ops->wait_mtu_credits(
2581                         server, wdata->bytes, &wsize, &credits);
2582
2583                 if (rc)
2584                         goto out;
2585
2586                 if (wsize < wdata->bytes) {
2587                         add_credits_and_wake_if(server, credits, 0);
2588                         msleep(1000);
2589                 }
2590         } while (wsize < wdata->bytes);
2591
2592         rc = -EAGAIN;
2593         while (rc == -EAGAIN) {
2594                 rc = 0;
2595                 if (wdata->cfile->invalidHandle)
2596                         rc = cifs_reopen_file(wdata->cfile, false);
2597                 if (!rc)
2598                         rc = server->ops->async_writev(wdata,
2599                                         cifs_uncached_writedata_release);
2600         }
2601
2602         if (!rc) {
2603                 list_add_tail(&wdata->list, wdata_list);
2604                 return 0;
2605         }
2606
2607         add_credits_and_wake_if(server, wdata->credits, 0);
2608 out:
2609         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2610
2611         return rc;
2612 }
2613
2614 static int
2615 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2616                      struct cifsFileInfo *open_file,
2617                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2618                      struct cifs_aio_ctx *ctx)
2619 {
2620         int rc = 0;
2621         size_t cur_len;
2622         unsigned long nr_pages, num_pages, i;
2623         struct cifs_writedata *wdata;
2624         struct iov_iter saved_from = *from;
2625         loff_t saved_offset = offset;
2626         pid_t pid;
2627         struct TCP_Server_Info *server;
2628         struct page **pagevec;
2629         size_t start;
2630
2631         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2632                 pid = open_file->pid;
2633         else
2634                 pid = current->tgid;
2635
2636         server = tlink_tcon(open_file->tlink)->ses->server;
2637
2638         do {
2639                 unsigned int wsize, credits;
2640
2641                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2642                                                    &wsize, &credits);
2643                 if (rc)
2644                         break;
2645
2646                 cur_len = min_t(const size_t, len, wsize);
2647
2648                 if (ctx->direct_io) {
2649                         ssize_t result;
2650
2651                         result = iov_iter_get_pages_alloc(
2652                                 from, &pagevec, cur_len, &start);
2653                         if (result < 0) {
2654                                 cifs_dbg(VFS,
2655                                         "direct_writev couldn't get user pages "
2656                                         "(rc=%zd) iter type %d iov_offset %zd "
2657                                         "count %zd\n",
2658                                         result, from->type,
2659                                         from->iov_offset, from->count);
2660                                 dump_stack();
2661
2662                                 rc = result;
2663                                 add_credits_and_wake_if(server, credits, 0);
2664                                 break;
2665                         }
2666                         cur_len = (size_t)result;
2667                         iov_iter_advance(from, cur_len);
2668
2669                         nr_pages =
2670                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2671
2672                         wdata = cifs_writedata_direct_alloc(pagevec,
2673                                              cifs_uncached_writev_complete);
2674                         if (!wdata) {
2675                                 rc = -ENOMEM;
2676                                 add_credits_and_wake_if(server, credits, 0);
2677                                 break;
2678                         }
2679
2680
2681                         wdata->page_offset = start;
2682                         wdata->tailsz =
2683                                 nr_pages > 1 ?
2684                                         cur_len - (PAGE_SIZE - start) -
2685                                         (nr_pages - 2) * PAGE_SIZE :
2686                                         cur_len;
2687                 } else {
2688                         nr_pages = get_numpages(wsize, len, &cur_len);
2689                         wdata = cifs_writedata_alloc(nr_pages,
2690                                              cifs_uncached_writev_complete);
2691                         if (!wdata) {
2692                                 rc = -ENOMEM;
2693                                 add_credits_and_wake_if(server, credits, 0);
2694                                 break;
2695                         }
2696
2697                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2698                         if (rc) {
2699                                 kvfree(wdata->pages);
2700                                 kfree(wdata);
2701                                 add_credits_and_wake_if(server, credits, 0);
2702                                 break;
2703                         }
2704
2705                         num_pages = nr_pages;
2706                         rc = wdata_fill_from_iovec(
2707                                 wdata, from, &cur_len, &num_pages);
2708                         if (rc) {
2709                                 for (i = 0; i < nr_pages; i++)
2710                                         put_page(wdata->pages[i]);
2711                                 kvfree(wdata->pages);
2712                                 kfree(wdata);
2713                                 add_credits_and_wake_if(server, credits, 0);
2714                                 break;
2715                         }
2716
2717                         /*
2718                          * Bring nr_pages down to the number of pages we
2719                          * actually used, and free any pages that we didn't use.
2720                          */
2721                         for ( ; nr_pages > num_pages; nr_pages--)
2722                                 put_page(wdata->pages[nr_pages - 1]);
2723
2724                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2725                 }
2726
2727                 wdata->sync_mode = WB_SYNC_ALL;
2728                 wdata->nr_pages = nr_pages;
2729                 wdata->offset = (__u64)offset;
2730                 wdata->cfile = cifsFileInfo_get(open_file);
2731                 wdata->pid = pid;
2732                 wdata->bytes = cur_len;
2733                 wdata->pagesz = PAGE_SIZE;
2734                 wdata->credits = credits;
2735                 wdata->ctx = ctx;
2736                 kref_get(&ctx->refcount);
2737
2738                 if (!wdata->cfile->invalidHandle ||
2739                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2740                         rc = server->ops->async_writev(wdata,
2741                                         cifs_uncached_writedata_release);
2742                 if (rc) {
2743                         add_credits_and_wake_if(server, wdata->credits, 0);
2744                         kref_put(&wdata->refcount,
2745                                  cifs_uncached_writedata_release);
2746                         if (rc == -EAGAIN) {
2747                                 *from = saved_from;
2748                                 iov_iter_advance(from, offset - saved_offset);
2749                                 continue;
2750                         }
2751                         break;
2752                 }
2753
2754                 list_add_tail(&wdata->list, wdata_list);
2755                 offset += cur_len;
2756                 len -= cur_len;
2757         } while (len > 0);
2758
2759         return rc;
2760 }
2761
2762 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2763 {
2764         struct cifs_writedata *wdata, *tmp;
2765         struct cifs_tcon *tcon;
2766         struct cifs_sb_info *cifs_sb;
2767         struct dentry *dentry = ctx->cfile->dentry;
2768         unsigned int i;
2769         int rc;
2770
2771         tcon = tlink_tcon(ctx->cfile->tlink);
2772         cifs_sb = CIFS_SB(dentry->d_sb);
2773
2774         mutex_lock(&ctx->aio_mutex);
2775
2776         if (list_empty(&ctx->list)) {
2777                 mutex_unlock(&ctx->aio_mutex);
2778                 return;
2779         }
2780
2781         rc = ctx->rc;
2782         /*
2783          * Wait for and collect replies for any successful sends in order of
2784          * increasing offset. Once an error is hit, then return without waiting
2785          * for any more replies.
2786          */
2787 restart_loop:
2788         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2789                 if (!rc) {
2790                         if (!try_wait_for_completion(&wdata->done)) {
2791                                 mutex_unlock(&ctx->aio_mutex);
2792                                 return;
2793                         }
2794
2795                         if (wdata->result)
2796                                 rc = wdata->result;
2797                         else
2798                                 ctx->total_len += wdata->bytes;
2799
2800                         /* resend call if it's a retryable error */
2801                         if (rc == -EAGAIN) {
2802                                 struct list_head tmp_list;
2803                                 struct iov_iter tmp_from = ctx->iter;
2804
2805                                 INIT_LIST_HEAD(&tmp_list);
2806                                 list_del_init(&wdata->list);
2807
2808                                 if (ctx->direct_io)
2809                                         rc = cifs_resend_wdata(
2810                                                 wdata, &tmp_list, ctx);
2811                                 else {
2812                                         iov_iter_advance(&tmp_from,
2813                                                  wdata->offset - ctx->pos);
2814
2815                                         rc = cifs_write_from_iter(wdata->offset,
2816                                                 wdata->bytes, &tmp_from,
2817                                                 ctx->cfile, cifs_sb, &tmp_list,
2818                                                 ctx);
2819                                 }
2820
2821                                 list_splice(&tmp_list, &ctx->list);
2822
2823                                 kref_put(&wdata->refcount,
2824                                          cifs_uncached_writedata_release);
2825                                 goto restart_loop;
2826                         }
2827                 }
2828                 list_del_init(&wdata->list);
2829                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2830         }
2831
2832         if (!ctx->direct_io)
2833                 for (i = 0; i < ctx->npages; i++)
2834                         put_page(ctx->bv[i].bv_page);
2835
2836         cifs_stats_bytes_written(tcon, ctx->total_len);
2837         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2838
2839         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2840
2841         mutex_unlock(&ctx->aio_mutex);
2842
2843         if (ctx->iocb && ctx->iocb->ki_complete)
2844                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2845         else
2846                 complete(&ctx->done);
2847 }
2848
2849 static ssize_t __cifs_writev(
2850         struct kiocb *iocb, struct iov_iter *from, bool direct)
2851 {
2852         struct file *file = iocb->ki_filp;
2853         ssize_t total_written = 0;
2854         struct cifsFileInfo *cfile;
2855         struct cifs_tcon *tcon;
2856         struct cifs_sb_info *cifs_sb;
2857         struct cifs_aio_ctx *ctx;
2858         struct iov_iter saved_from = *from;
2859         size_t len = iov_iter_count(from);
2860         int rc;
2861
2862         /*
2863          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2864          * In this case, fall back to non-direct write function.
2865          * this could be improved by getting pages directly in ITER_KVEC
2866          */
2867         if (direct && from->type & ITER_KVEC) {
2868                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2869                 direct = false;
2870         }
2871
2872         rc = generic_write_checks(iocb, from);
2873         if (rc <= 0)
2874                 return rc;
2875
2876         cifs_sb = CIFS_FILE_SB(file);
2877         cfile = file->private_data;
2878         tcon = tlink_tcon(cfile->tlink);
2879
2880         if (!tcon->ses->server->ops->async_writev)
2881                 return -ENOSYS;
2882
2883         ctx = cifs_aio_ctx_alloc();
2884         if (!ctx)
2885                 return -ENOMEM;
2886
2887         ctx->cfile = cifsFileInfo_get(cfile);
2888
2889         if (!is_sync_kiocb(iocb))
2890                 ctx->iocb = iocb;
2891
2892         ctx->pos = iocb->ki_pos;
2893
2894         if (direct) {
2895                 ctx->direct_io = true;
2896                 ctx->iter = *from;
2897                 ctx->len = len;
2898         } else {
2899                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2900                 if (rc) {
2901                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2902                         return rc;
2903                 }
2904         }
2905
2906         /* grab a lock here due to read response handlers can access ctx */
2907         mutex_lock(&ctx->aio_mutex);
2908
2909         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2910                                   cfile, cifs_sb, &ctx->list, ctx);
2911
2912         /*
2913          * If at least one write was successfully sent, then discard any rc
2914          * value from the later writes. If the other write succeeds, then
2915          * we'll end up returning whatever was written. If it fails, then
2916          * we'll get a new rc value from that.
2917          */
2918         if (!list_empty(&ctx->list))
2919                 rc = 0;
2920
2921         mutex_unlock(&ctx->aio_mutex);
2922
2923         if (rc) {
2924                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2925                 return rc;
2926         }
2927
2928         if (!is_sync_kiocb(iocb)) {
2929                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2930                 return -EIOCBQUEUED;
2931         }
2932
2933         rc = wait_for_completion_killable(&ctx->done);
2934         if (rc) {
2935                 mutex_lock(&ctx->aio_mutex);
2936                 ctx->rc = rc = -EINTR;
2937                 total_written = ctx->total_len;
2938                 mutex_unlock(&ctx->aio_mutex);
2939         } else {
2940                 rc = ctx->rc;
2941                 total_written = ctx->total_len;
2942         }
2943
2944         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2945
2946         if (unlikely(!total_written))
2947                 return rc;
2948
2949         iocb->ki_pos += total_written;
2950         return total_written;
2951 }
2952
2953 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2954 {
2955         return __cifs_writev(iocb, from, true);
2956 }
2957
2958 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2959 {
2960         return __cifs_writev(iocb, from, false);
2961 }
2962
2963 static ssize_t
2964 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2965 {
2966         struct file *file = iocb->ki_filp;
2967         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2968         struct inode *inode = file->f_mapping->host;
2969         struct cifsInodeInfo *cinode = CIFS_I(inode);
2970         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2971         ssize_t rc;
2972
2973         inode_lock(inode);
2974         /*
2975          * We need to hold the sem to be sure nobody modifies lock list
2976          * with a brlock that prevents writing.
2977          */
2978         down_read(&cinode->lock_sem);
2979
2980         rc = generic_write_checks(iocb, from);
2981         if (rc <= 0)
2982                 goto out;
2983
2984         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2985                                      server->vals->exclusive_lock_type, 0,
2986                                      NULL, CIFS_WRITE_OP))
2987                 rc = __generic_file_write_iter(iocb, from);
2988         else
2989                 rc = -EACCES;
2990 out:
2991         up_read(&cinode->lock_sem);
2992         inode_unlock(inode);
2993
2994         if (rc > 0)
2995                 rc = generic_write_sync(iocb, rc);
2996         return rc;
2997 }
2998
2999 ssize_t
3000 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3001 {
3002         struct inode *inode = file_inode(iocb->ki_filp);
3003         struct cifsInodeInfo *cinode = CIFS_I(inode);
3004         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3005         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3006                                                 iocb->ki_filp->private_data;
3007         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3008         ssize_t written;
3009
3010         written = cifs_get_writer(cinode);
3011         if (written)
3012                 return written;
3013
3014         if (CIFS_CACHE_WRITE(cinode)) {
3015                 if (cap_unix(tcon->ses) &&
3016                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3017                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3018                         written = generic_file_write_iter(iocb, from);
3019                         goto out;
3020                 }
3021                 written = cifs_writev(iocb, from);
3022                 goto out;
3023         }
3024         /*
3025          * For non-oplocked files in strict cache mode we need to write the data
3026          * to the server exactly from the pos to pos+len-1 rather than flush all
3027          * affected pages because it may cause a error with mandatory locks on
3028          * these pages but not on the region from pos to ppos+len-1.
3029          */
3030         written = cifs_user_writev(iocb, from);
3031         if (written > 0 && CIFS_CACHE_READ(cinode)) {
3032                 /*
3033                  * Windows 7 server can delay breaking level2 oplock if a write
3034                  * request comes - break it on the client to prevent reading
3035                  * an old data.
3036                  */
3037                 cifs_zap_mapping(inode);
3038                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
3039                          inode);
3040                 cinode->oplock = 0;
3041         }
3042 out:
3043         cifs_put_writer(cinode);
3044         return written;
3045 }
3046
3047 static struct cifs_readdata *
3048 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3049 {
3050         struct cifs_readdata *rdata;
3051
3052         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3053         if (rdata != NULL) {
3054                 rdata->pages = pages;
3055                 kref_init(&rdata->refcount);
3056                 INIT_LIST_HEAD(&rdata->list);
3057                 init_completion(&rdata->done);
3058                 INIT_WORK(&rdata->work, complete);
3059         }
3060
3061         return rdata;
3062 }
3063
3064 static struct cifs_readdata *
3065 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3066 {
3067         struct page **pages =
3068                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3069         struct cifs_readdata *ret = NULL;
3070
3071         if (pages) {
3072                 ret = cifs_readdata_direct_alloc(pages, complete);
3073                 if (!ret)
3074                         kfree(pages);
3075         }
3076
3077         return ret;
3078 }
3079
3080 void
3081 cifs_readdata_release(struct kref *refcount)
3082 {
3083         struct cifs_readdata *rdata = container_of(refcount,
3084                                         struct cifs_readdata, refcount);
3085 #ifdef CONFIG_CIFS_SMB_DIRECT
3086         if (rdata->mr) {
3087                 smbd_deregister_mr(rdata->mr);
3088                 rdata->mr = NULL;
3089         }
3090 #endif
3091         if (rdata->cfile)
3092                 cifsFileInfo_put(rdata->cfile);
3093
3094         kvfree(rdata->pages);
3095         kfree(rdata);
3096 }
3097
3098 static int
3099 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3100 {
3101         int rc = 0;
3102         struct page *page;
3103         unsigned int i;
3104
3105         for (i = 0; i < nr_pages; i++) {
3106                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3107                 if (!page) {
3108                         rc = -ENOMEM;
3109                         break;
3110                 }
3111                 rdata->pages[i] = page;
3112         }
3113
3114         if (rc) {
3115                 for (i = 0; i < nr_pages; i++) {
3116                         put_page(rdata->pages[i]);
3117                         rdata->pages[i] = NULL;
3118                 }
3119         }
3120         return rc;
3121 }
3122
3123 static void
3124 cifs_uncached_readdata_release(struct kref *refcount)
3125 {
3126         struct cifs_readdata *rdata = container_of(refcount,
3127                                         struct cifs_readdata, refcount);
3128         unsigned int i;
3129
3130         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3131         for (i = 0; i < rdata->nr_pages; i++) {
3132                 put_page(rdata->pages[i]);
3133         }
3134         cifs_readdata_release(refcount);
3135 }
3136
3137 /**
3138  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3139  * @rdata:      the readdata response with list of pages holding data
3140  * @iter:       destination for our data
3141  *
3142  * This function copies data from a list of pages in a readdata response into
3143  * an array of iovecs. It will first calculate where the data should go
3144  * based on the info in the readdata and then copy the data into that spot.
3145  */
3146 static int
3147 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3148 {
3149         size_t remaining = rdata->got_bytes;
3150         unsigned int i;
3151
3152         for (i = 0; i < rdata->nr_pages; i++) {
3153                 struct page *page = rdata->pages[i];
3154                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3155                 size_t written;
3156
3157                 if (unlikely(iov_iter_is_pipe(iter))) {
3158                         void *addr = kmap_atomic(page);
3159
3160                         written = copy_to_iter(addr, copy, iter);
3161                         kunmap_atomic(addr);
3162                 } else
3163                         written = copy_page_to_iter(page, 0, copy, iter);
3164                 remaining -= written;
3165                 if (written < copy && iov_iter_count(iter) > 0)
3166                         break;
3167         }
3168         return remaining ? -EFAULT : 0;
3169 }
3170
3171 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3172
3173 static void
3174 cifs_uncached_readv_complete(struct work_struct *work)
3175 {
3176         struct cifs_readdata *rdata = container_of(work,
3177                                                 struct cifs_readdata, work);
3178
3179         complete(&rdata->done);
3180         collect_uncached_read_data(rdata->ctx);
3181         /* the below call can possibly free the last ref to aio ctx */
3182         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3183 }
3184
3185 static int
3186 uncached_fill_pages(struct TCP_Server_Info *server,
3187                     struct cifs_readdata *rdata, struct iov_iter *iter,
3188                     unsigned int len)
3189 {
3190         int result = 0;
3191         unsigned int i;
3192         unsigned int nr_pages = rdata->nr_pages;
3193         unsigned int page_offset = rdata->page_offset;
3194
3195         rdata->got_bytes = 0;
3196         rdata->tailsz = PAGE_SIZE;
3197         for (i = 0; i < nr_pages; i++) {
3198                 struct page *page = rdata->pages[i];
3199                 size_t n;
3200                 unsigned int segment_size = rdata->pagesz;
3201
3202                 if (i == 0)
3203                         segment_size -= page_offset;
3204                 else
3205                         page_offset = 0;
3206
3207
3208                 if (len <= 0) {
3209                         /* no need to hold page hostage */
3210                         rdata->pages[i] = NULL;
3211                         rdata->nr_pages--;
3212                         put_page(page);
3213                         continue;
3214                 }
3215
3216                 n = len;
3217                 if (len >= segment_size)
3218                         /* enough data to fill the page */
3219                         n = segment_size;
3220                 else
3221                         rdata->tailsz = len;
3222                 len -= n;
3223
3224                 if (iter)
3225                         result = copy_page_from_iter(
3226                                         page, page_offset, n, iter);
3227 #ifdef CONFIG_CIFS_SMB_DIRECT
3228                 else if (rdata->mr)
3229                         result = n;
3230 #endif
3231                 else
3232                         result = cifs_read_page_from_socket(
3233                                         server, page, page_offset, n);
3234                 if (result < 0)
3235                         break;
3236
3237                 rdata->got_bytes += result;
3238         }
3239
3240         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3241                                                 rdata->got_bytes : result;
3242 }
3243
3244 static int
3245 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3246                               struct cifs_readdata *rdata, unsigned int len)
3247 {
3248         return uncached_fill_pages(server, rdata, NULL, len);
3249 }
3250
3251 static int
3252 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3253                               struct cifs_readdata *rdata,
3254                               struct iov_iter *iter)
3255 {
3256         return uncached_fill_pages(server, rdata, iter, iter->count);
3257 }
3258
3259 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3260                         struct list_head *rdata_list,
3261                         struct cifs_aio_ctx *ctx)
3262 {
3263         unsigned int rsize, credits;
3264         int rc;
3265         struct TCP_Server_Info *server =
3266                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3267
3268         /*
3269          * Wait for credits to resend this rdata.
3270          * Note: we are attempting to resend the whole rdata not in segments
3271          */
3272         do {
3273                 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3274                                                 &rsize, &credits);
3275
3276                 if (rc)
3277                         goto out;
3278
3279                 if (rsize < rdata->bytes) {
3280                         add_credits_and_wake_if(server, credits, 0);
3281                         msleep(1000);
3282                 }
3283         } while (rsize < rdata->bytes);
3284
3285         rc = -EAGAIN;
3286         while (rc == -EAGAIN) {
3287                 rc = 0;
3288                 if (rdata->cfile->invalidHandle)
3289                         rc = cifs_reopen_file(rdata->cfile, true);
3290                 if (!rc)
3291                         rc = server->ops->async_readv(rdata);
3292         }
3293
3294         if (!rc) {
3295                 /* Add to aio pending list */
3296                 list_add_tail(&rdata->list, rdata_list);
3297                 return 0;
3298         }
3299
3300         add_credits_and_wake_if(server, rdata->credits, 0);
3301 out:
3302         kref_put(&rdata->refcount,
3303                 cifs_uncached_readdata_release);
3304
3305         return rc;
3306 }
3307
3308 static int
3309 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3310                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3311                      struct cifs_aio_ctx *ctx)
3312 {
3313         struct cifs_readdata *rdata;
3314         unsigned int npages, rsize, credits;
3315         size_t cur_len;
3316         int rc;
3317         pid_t pid;
3318         struct TCP_Server_Info *server;
3319         struct page **pagevec;
3320         size_t start;
3321         struct iov_iter direct_iov = ctx->iter;
3322
3323         server = tlink_tcon(open_file->tlink)->ses->server;
3324
3325         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3326                 pid = open_file->pid;
3327         else
3328                 pid = current->tgid;
3329
3330         if (ctx->direct_io)
3331                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3332
3333         do {
3334                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3335                                                    &rsize, &credits);
3336                 if (rc)
3337                         break;
3338
3339                 cur_len = min_t(const size_t, len, rsize);
3340
3341                 if (ctx->direct_io) {
3342                         ssize_t result;
3343
3344                         result = iov_iter_get_pages_alloc(
3345                                         &direct_iov, &pagevec,
3346                                         cur_len, &start);
3347                         if (result < 0) {
3348                                 cifs_dbg(VFS,
3349                                         "couldn't get user pages (rc=%zd)"
3350                                         " iter type %d"
3351                                         " iov_offset %zd count %zd\n",
3352                                         result, direct_iov.type,
3353                                         direct_iov.iov_offset,
3354                                         direct_iov.count);
3355                                 dump_stack();
3356
3357                                 rc = result;
3358                                 add_credits_and_wake_if(server, credits, 0);
3359                                 break;
3360                         }
3361                         cur_len = (size_t)result;
3362                         iov_iter_advance(&direct_iov, cur_len);
3363
3364                         rdata = cifs_readdata_direct_alloc(
3365                                         pagevec, cifs_uncached_readv_complete);
3366                         if (!rdata) {
3367                                 add_credits_and_wake_if(server, credits, 0);
3368                                 rc = -ENOMEM;
3369                                 break;
3370                         }
3371
3372                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3373                         rdata->page_offset = start;
3374                         rdata->tailsz = npages > 1 ?
3375                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3376                                 cur_len;
3377
3378                 } else {
3379
3380                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3381                         /* allocate a readdata struct */
3382                         rdata = cifs_readdata_alloc(npages,
3383                                             cifs_uncached_readv_complete);
3384                         if (!rdata) {
3385                                 add_credits_and_wake_if(server, credits, 0);
3386                                 rc = -ENOMEM;
3387                                 break;
3388                         }
3389
3390                         rc = cifs_read_allocate_pages(rdata, npages);
3391                         if (rc) {
3392                                 kvfree(rdata->pages);
3393                                 kfree(rdata);
3394                                 add_credits_and_wake_if(server, credits, 0);
3395                                 break;
3396                         }
3397
3398                         rdata->tailsz = PAGE_SIZE;
3399                 }
3400
3401                 rdata->cfile = cifsFileInfo_get(open_file);
3402                 rdata->nr_pages = npages;
3403                 rdata->offset = offset;
3404                 rdata->bytes = cur_len;
3405                 rdata->pid = pid;
3406                 rdata->pagesz = PAGE_SIZE;
3407                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3408                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3409                 rdata->credits = credits;
3410                 rdata->ctx = ctx;
3411                 kref_get(&ctx->refcount);
3412
3413                 if (!rdata->cfile->invalidHandle ||
3414                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3415                         rc = server->ops->async_readv(rdata);
3416                 if (rc) {
3417                         add_credits_and_wake_if(server, rdata->credits, 0);
3418                         kref_put(&rdata->refcount,
3419                                 cifs_uncached_readdata_release);
3420                         if (rc == -EAGAIN) {
3421                                 iov_iter_revert(&direct_iov, cur_len);
3422                                 continue;
3423                         }
3424                         break;
3425                 }
3426
3427                 list_add_tail(&rdata->list, rdata_list);
3428                 offset += cur_len;
3429                 len -= cur_len;
3430         } while (len > 0);
3431
3432         return rc;
3433 }
3434
3435 static void
3436 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3437 {
3438         struct cifs_readdata *rdata, *tmp;
3439         struct iov_iter *to = &ctx->iter;
3440         struct cifs_sb_info *cifs_sb;
3441         struct cifs_tcon *tcon;
3442         unsigned int i;
3443         int rc;
3444
3445         tcon = tlink_tcon(ctx->cfile->tlink);
3446         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3447
3448         mutex_lock(&ctx->aio_mutex);
3449
3450         if (list_empty(&ctx->list)) {
3451                 mutex_unlock(&ctx->aio_mutex);
3452                 return;
3453         }
3454
3455         rc = ctx->rc;
3456         /* the loop below should proceed in the order of increasing offsets */
3457 again:
3458         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3459                 if (!rc) {
3460                         if (!try_wait_for_completion(&rdata->done)) {
3461                                 mutex_unlock(&ctx->aio_mutex);
3462                                 return;
3463                         }
3464
3465                         if (rdata->result == -EAGAIN) {
3466                                 /* resend call if it's a retryable error */
3467                                 struct list_head tmp_list;
3468                                 unsigned int got_bytes = rdata->got_bytes;
3469
3470                                 list_del_init(&rdata->list);
3471                                 INIT_LIST_HEAD(&tmp_list);
3472
3473                                 /*
3474                                  * Got a part of data and then reconnect has
3475                                  * happened -- fill the buffer and continue
3476                                  * reading.
3477                                  */
3478                                 if (got_bytes && got_bytes < rdata->bytes) {
3479                                         rc = 0;
3480                                         if (!ctx->direct_io)
3481                                                 rc = cifs_readdata_to_iov(rdata, to);
3482                                         if (rc) {
3483                                                 kref_put(&rdata->refcount,
3484                                                         cifs_uncached_readdata_release);
3485                                                 continue;
3486                                         }
3487                                 }
3488
3489                                 if (ctx->direct_io) {
3490                                         /*
3491                                          * Re-use rdata as this is a
3492                                          * direct I/O
3493                                          */
3494                                         rc = cifs_resend_rdata(
3495                                                 rdata,
3496                                                 &tmp_list, ctx);
3497                                 } else {
3498                                         rc = cifs_send_async_read(
3499                                                 rdata->offset + got_bytes,
3500                                                 rdata->bytes - got_bytes,
3501                                                 rdata->cfile, cifs_sb,
3502                                                 &tmp_list, ctx);
3503
3504                                         kref_put(&rdata->refcount,
3505                                                 cifs_uncached_readdata_release);
3506                                 }
3507
3508                                 list_splice(&tmp_list, &ctx->list);
3509
3510                                 goto again;
3511                         } else if (rdata->result)
3512                                 rc = rdata->result;
3513                         else if (!ctx->direct_io)
3514                                 rc = cifs_readdata_to_iov(rdata, to);
3515
3516                         /* if there was a short read -- discard anything left */
3517                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3518                                 rc = -ENODATA;
3519
3520                         ctx->total_len += rdata->got_bytes;
3521                 }
3522                 list_del_init(&rdata->list);
3523                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3524         }
3525
3526         if (!ctx->direct_io) {
3527                 for (i = 0; i < ctx->npages; i++) {
3528                         if (ctx->should_dirty)
3529                                 set_page_dirty(ctx->bv[i].bv_page);
3530                         put_page(ctx->bv[i].bv_page);
3531                 }
3532
3533                 ctx->total_len = ctx->len - iov_iter_count(to);
3534         }
3535
3536         cifs_stats_bytes_read(tcon, ctx->total_len);
3537
3538         /* mask nodata case */
3539         if (rc == -ENODATA)
3540                 rc = 0;
3541
3542         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3543
3544         mutex_unlock(&ctx->aio_mutex);
3545
3546         if (ctx->iocb && ctx->iocb->ki_complete)
3547                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3548         else
3549                 complete(&ctx->done);
3550 }
3551
3552 static ssize_t __cifs_readv(
3553         struct kiocb *iocb, struct iov_iter *to, bool direct)
3554 {
3555         size_t len;
3556         struct file *file = iocb->ki_filp;
3557         struct cifs_sb_info *cifs_sb;
3558         struct cifsFileInfo *cfile;
3559         struct cifs_tcon *tcon;
3560         ssize_t rc, total_read = 0;
3561         loff_t offset = iocb->ki_pos;
3562         struct cifs_aio_ctx *ctx;
3563
3564         /*
3565          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3566          * fall back to data copy read path
3567          * this could be improved by getting pages directly in ITER_KVEC
3568          */
3569         if (direct && to->type & ITER_KVEC) {
3570                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3571                 direct = false;
3572         }
3573
3574         len = iov_iter_count(to);
3575         if (!len)
3576                 return 0;
3577
3578         cifs_sb = CIFS_FILE_SB(file);
3579         cfile = file->private_data;
3580         tcon = tlink_tcon(cfile->tlink);
3581
3582         if (!tcon->ses->server->ops->async_readv)
3583                 return -ENOSYS;
3584
3585         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3586                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3587
3588         ctx = cifs_aio_ctx_alloc();
3589         if (!ctx)
3590                 return -ENOMEM;
3591
3592         ctx->cfile = cifsFileInfo_get(cfile);
3593
3594         if (!is_sync_kiocb(iocb))
3595                 ctx->iocb = iocb;
3596
3597         if (iter_is_iovec(to))
3598                 ctx->should_dirty = true;
3599
3600         if (direct) {
3601                 ctx->pos = offset;
3602                 ctx->direct_io = true;
3603                 ctx->iter = *to;
3604                 ctx->len = len;
3605         } else {
3606                 rc = setup_aio_ctx_iter(ctx, to, READ);
3607                 if (rc) {
3608                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3609                         return rc;
3610                 }
3611                 len = ctx->len;
3612         }
3613
3614         /* grab a lock here due to read response handlers can access ctx */
3615         mutex_lock(&ctx->aio_mutex);
3616
3617         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3618
3619         /* if at least one read request send succeeded, then reset rc */
3620         if (!list_empty(&ctx->list))
3621                 rc = 0;
3622
3623         mutex_unlock(&ctx->aio_mutex);
3624
3625         if (rc) {
3626                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3627                 return rc;
3628         }
3629
3630         if (!is_sync_kiocb(iocb)) {
3631                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3632                 return -EIOCBQUEUED;
3633         }
3634
3635         rc = wait_for_completion_killable(&ctx->done);
3636         if (rc) {
3637                 mutex_lock(&ctx->aio_mutex);
3638                 ctx->rc = rc = -EINTR;
3639                 total_read = ctx->total_len;
3640                 mutex_unlock(&ctx->aio_mutex);
3641         } else {
3642                 rc = ctx->rc;
3643                 total_read = ctx->total_len;
3644         }
3645
3646         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3647
3648         if (total_read) {
3649                 iocb->ki_pos += total_read;
3650                 return total_read;
3651         }
3652         return rc;
3653 }
3654
3655 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3656 {
3657         return __cifs_readv(iocb, to, true);
3658 }
3659
3660 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3661 {
3662         return __cifs_readv(iocb, to, false);
3663 }
3664
3665 ssize_t
3666 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3667 {
3668         struct inode *inode = file_inode(iocb->ki_filp);
3669         struct cifsInodeInfo *cinode = CIFS_I(inode);
3670         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3671         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3672                                                 iocb->ki_filp->private_data;
3673         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3674         int rc = -EACCES;
3675
3676         /*
3677          * In strict cache mode we need to read from the server all the time
3678          * if we don't have level II oplock because the server can delay mtime
3679          * change - so we can't make a decision about inode invalidating.
3680          * And we can also fail with pagereading if there are mandatory locks
3681          * on pages affected by this read but not on the region from pos to
3682          * pos+len-1.
3683          */
3684         if (!CIFS_CACHE_READ(cinode))
3685                 return cifs_user_readv(iocb, to);
3686
3687         if (cap_unix(tcon->ses) &&
3688             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3689             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3690                 return generic_file_read_iter(iocb, to);
3691
3692         /*
3693          * We need to hold the sem to be sure nobody modifies lock list
3694          * with a brlock that prevents reading.
3695          */
3696         down_read(&cinode->lock_sem);
3697         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3698                                      tcon->ses->server->vals->shared_lock_type,
3699                                      0, NULL, CIFS_READ_OP))
3700                 rc = generic_file_read_iter(iocb, to);
3701         up_read(&cinode->lock_sem);
3702         return rc;
3703 }
3704
3705 static ssize_t
3706 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3707 {
3708         int rc = -EACCES;
3709         unsigned int bytes_read = 0;
3710         unsigned int total_read;
3711         unsigned int current_read_size;
3712         unsigned int rsize;
3713         struct cifs_sb_info *cifs_sb;
3714         struct cifs_tcon *tcon;
3715         struct TCP_Server_Info *server;
3716         unsigned int xid;
3717         char *cur_offset;
3718         struct cifsFileInfo *open_file;
3719         struct cifs_io_parms io_parms;
3720         int buf_type = CIFS_NO_BUFFER;
3721         __u32 pid;
3722
3723         xid = get_xid();
3724         cifs_sb = CIFS_FILE_SB(file);
3725
3726         /* FIXME: set up handlers for larger reads and/or convert to async */
3727         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3728
3729         if (file->private_data == NULL) {
3730                 rc = -EBADF;
3731                 free_xid(xid);
3732                 return rc;
3733         }
3734         open_file = file->private_data;
3735         tcon = tlink_tcon(open_file->tlink);
3736         server = tcon->ses->server;
3737
3738         if (!server->ops->sync_read) {
3739                 free_xid(xid);
3740                 return -ENOSYS;
3741         }
3742
3743         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3744                 pid = open_file->pid;
3745         else
3746                 pid = current->tgid;
3747
3748         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3749                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3750
3751         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3752              total_read += bytes_read, cur_offset += bytes_read) {
3753                 do {
3754                         current_read_size = min_t(uint, read_size - total_read,
3755                                                   rsize);
3756                         /*
3757                          * For windows me and 9x we do not want to request more
3758                          * than it negotiated since it will refuse the read
3759                          * then.
3760                          */
3761                         if ((tcon->ses) && !(tcon->ses->capabilities &
3762                                 tcon->ses->server->vals->cap_large_files)) {
3763                                 current_read_size = min_t(uint,
3764                                         current_read_size, CIFSMaxBufSize);
3765                         }
3766                         if (open_file->invalidHandle) {
3767                                 rc = cifs_reopen_file(open_file, true);
3768                                 if (rc != 0)
3769                                         break;
3770                         }
3771                         io_parms.pid = pid;
3772                         io_parms.tcon = tcon;
3773                         io_parms.offset = *offset;
3774                         io_parms.length = current_read_size;
3775                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3776                                                     &bytes_read, &cur_offset,
3777                                                     &buf_type);
3778                 } while (rc == -EAGAIN);
3779
3780                 if (rc || (bytes_read == 0)) {
3781                         if (total_read) {
3782                                 break;
3783                         } else {
3784                                 free_xid(xid);
3785                                 return rc;
3786                         }
3787                 } else {
3788                         cifs_stats_bytes_read(tcon, total_read);
3789                         *offset += bytes_read;
3790                 }
3791         }
3792         free_xid(xid);
3793         return total_read;
3794 }
3795
3796 /*
3797  * If the page is mmap'ed into a process' page tables, then we need to make
3798  * sure that it doesn't change while being written back.
3799  */
3800 static vm_fault_t
3801 cifs_page_mkwrite(struct vm_fault *vmf)
3802 {
3803         struct page *page = vmf->page;
3804
3805         lock_page(page);
3806         return VM_FAULT_LOCKED;
3807 }
3808
3809 static const struct vm_operations_struct cifs_file_vm_ops = {
3810         .fault = filemap_fault,
3811         .map_pages = filemap_map_pages,
3812         .page_mkwrite = cifs_page_mkwrite,
3813 };
3814
3815 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3816 {
3817         int xid, rc = 0;
3818         struct inode *inode = file_inode(file);
3819
3820         xid = get_xid();
3821
3822         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3823                 rc = cifs_zap_mapping(inode);
3824         if (!rc)
3825                 rc = generic_file_mmap(file, vma);
3826         if (!rc)
3827                 vma->vm_ops = &cifs_file_vm_ops;
3828
3829         free_xid(xid);
3830         return rc;
3831 }
3832
3833 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3834 {
3835         int rc, xid;
3836
3837         xid = get_xid();
3838
3839         rc = cifs_revalidate_file(file);
3840         if (rc)
3841                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3842                          rc);
3843         if (!rc)
3844                 rc = generic_file_mmap(file, vma);
3845         if (!rc)
3846                 vma->vm_ops = &cifs_file_vm_ops;
3847
3848         free_xid(xid);
3849         return rc;
3850 }
3851
3852 static void
3853 cifs_readv_complete(struct work_struct *work)
3854 {
3855         unsigned int i, got_bytes;
3856         struct cifs_readdata *rdata = container_of(work,
3857                                                 struct cifs_readdata, work);
3858
3859         got_bytes = rdata->got_bytes;
3860         for (i = 0; i < rdata->nr_pages; i++) {
3861                 struct page *page = rdata->pages[i];
3862
3863                 lru_cache_add_file(page);
3864
3865                 if (rdata->result == 0 ||
3866                     (rdata->result == -EAGAIN && got_bytes)) {
3867                         flush_dcache_page(page);
3868                         SetPageUptodate(page);
3869                 }
3870
3871                 unlock_page(page);
3872
3873                 if (rdata->result == 0 ||
3874                     (rdata->result == -EAGAIN && got_bytes))
3875                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3876
3877                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3878
3879                 put_page(page);
3880                 rdata->pages[i] = NULL;
3881         }
3882         kref_put(&rdata->refcount, cifs_readdata_release);
3883 }
3884
3885 static int
3886 readpages_fill_pages(struct TCP_Server_Info *server,
3887                      struct cifs_readdata *rdata, struct iov_iter *iter,
3888                      unsigned int len)
3889 {
3890         int result = 0;
3891         unsigned int i;
3892         u64 eof;
3893         pgoff_t eof_index;
3894         unsigned int nr_pages = rdata->nr_pages;
3895         unsigned int page_offset = rdata->page_offset;
3896
3897         /* determine the eof that the server (probably) has */
3898         eof = CIFS_I(rdata->mapping->host)->server_eof;
3899         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3900         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3901
3902         rdata->got_bytes = 0;
3903         rdata->tailsz = PAGE_SIZE;
3904         for (i = 0; i < nr_pages; i++) {
3905                 struct page *page = rdata->pages[i];
3906                 unsigned int to_read = rdata->pagesz;
3907                 size_t n;
3908
3909                 if (i == 0)
3910                         to_read -= page_offset;
3911                 else
3912                         page_offset = 0;
3913
3914                 n = to_read;
3915
3916                 if (len >= to_read) {
3917                         len -= to_read;
3918                 } else if (len > 0) {
3919                         /* enough for partial page, fill and zero the rest */
3920                         zero_user(page, len + page_offset, to_read - len);
3921                         n = rdata->tailsz = len;
3922                         len = 0;
3923                 } else if (page->index > eof_index) {
3924                         /*
3925                          * The VFS will not try to do readahead past the
3926                          * i_size, but it's possible that we have outstanding
3927                          * writes with gaps in the middle and the i_size hasn't
3928                          * caught up yet. Populate those with zeroed out pages
3929                          * to prevent the VFS from repeatedly attempting to
3930                          * fill them until the writes are flushed.
3931                          */
3932                         zero_user(page, 0, PAGE_SIZE);
3933                         lru_cache_add_file(page);
3934                         flush_dcache_page(page);
3935                         SetPageUptodate(page);
3936                         unlock_page(page);
3937                         put_page(page);
3938                         rdata->pages[i] = NULL;
3939                         rdata->nr_pages--;
3940                         continue;
3941                 } else {
3942                         /* no need to hold page hostage */
3943                         lru_cache_add_file(page);
3944                         unlock_page(page);
3945                         put_page(page);
3946                         rdata->pages[i] = NULL;
3947                         rdata->nr_pages--;
3948                         continue;
3949                 }
3950
3951                 if (iter)
3952                         result = copy_page_from_iter(
3953                                         page, page_offset, n, iter);
3954 #ifdef CONFIG_CIFS_SMB_DIRECT
3955                 else if (rdata->mr)
3956                         result = n;
3957 #endif
3958                 else
3959                         result = cifs_read_page_from_socket(
3960                                         server, page, page_offset, n);
3961                 if (result < 0)
3962                         break;
3963
3964                 rdata->got_bytes += result;
3965         }
3966
3967         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3968                                                 rdata->got_bytes : result;
3969 }
3970
3971 static int
3972 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3973                                struct cifs_readdata *rdata, unsigned int len)
3974 {
3975         return readpages_fill_pages(server, rdata, NULL, len);
3976 }
3977
3978 static int
3979 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3980                                struct cifs_readdata *rdata,
3981                                struct iov_iter *iter)
3982 {
3983         return readpages_fill_pages(server, rdata, iter, iter->count);
3984 }
3985
3986 static int
3987 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3988                     unsigned int rsize, struct list_head *tmplist,
3989                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3990 {
3991         struct page *page, *tpage;
3992         unsigned int expected_index;
3993         int rc;
3994         gfp_t gfp = readahead_gfp_mask(mapping);
3995
3996         INIT_LIST_HEAD(tmplist);
3997
3998         page = lru_to_page(page_list);
3999
4000         /*
4001          * Lock the page and put it in the cache. Since no one else
4002          * should have access to this page, we're safe to simply set
4003          * PG_locked without checking it first.
4004          */
4005         __SetPageLocked(page);
4006         rc = add_to_page_cache_locked(page, mapping,
4007                                       page->index, gfp);
4008
4009         /* give up if we can't stick it in the cache */
4010         if (rc) {
4011                 __ClearPageLocked(page);
4012                 return rc;
4013         }
4014
4015         /* move first page to the tmplist */
4016         *offset = (loff_t)page->index << PAGE_SHIFT;
4017         *bytes = PAGE_SIZE;
4018         *nr_pages = 1;
4019         list_move_tail(&page->lru, tmplist);
4020
4021         /* now try and add more pages onto the request */
4022         expected_index = page->index + 1;
4023         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4024                 /* discontinuity ? */
4025                 if (page->index != expected_index)
4026                         break;
4027
4028                 /* would this page push the read over the rsize? */
4029                 if (*bytes + PAGE_SIZE > rsize)
4030                         break;
4031
4032                 __SetPageLocked(page);
4033                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4034                         __ClearPageLocked(page);
4035                         break;
4036                 }
4037                 list_move_tail(&page->lru, tmplist);
4038                 (*bytes) += PAGE_SIZE;
4039                 expected_index++;
4040                 (*nr_pages)++;
4041         }
4042         return rc;
4043 }
4044
4045 static int cifs_readpages(struct file *file, struct address_space *mapping,
4046         struct list_head *page_list, unsigned num_pages)
4047 {
4048         int rc;
4049         struct list_head tmplist;
4050         struct cifsFileInfo *open_file = file->private_data;
4051         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4052         struct TCP_Server_Info *server;
4053         pid_t pid;
4054         unsigned int xid;
4055
4056         xid = get_xid();
4057         /*
4058          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4059          * immediately if the cookie is negative
4060          *
4061          * After this point, every page in the list might have PG_fscache set,
4062          * so we will need to clean that up off of every page we don't use.
4063          */
4064         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4065                                          &num_pages);
4066         if (rc == 0) {
4067                 free_xid(xid);
4068                 return rc;
4069         }
4070
4071         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4072                 pid = open_file->pid;
4073         else
4074                 pid = current->tgid;
4075
4076         rc = 0;
4077         server = tlink_tcon(open_file->tlink)->ses->server;
4078
4079         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4080                  __func__, file, mapping, num_pages);
4081
4082         /*
4083          * Start with the page at end of list and move it to private
4084          * list. Do the same with any following pages until we hit
4085          * the rsize limit, hit an index discontinuity, or run out of
4086          * pages. Issue the async read and then start the loop again
4087          * until the list is empty.
4088          *
4089          * Note that list order is important. The page_list is in
4090          * the order of declining indexes. When we put the pages in
4091          * the rdata->pages, then we want them in increasing order.
4092          */
4093         while (!list_empty(page_list)) {
4094                 unsigned int i, nr_pages, bytes, rsize;
4095                 loff_t offset;
4096                 struct page *page, *tpage;
4097                 struct cifs_readdata *rdata;
4098                 unsigned credits;
4099
4100                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4101                                                    &rsize, &credits);
4102                 if (rc)
4103                         break;
4104
4105                 /*
4106                  * Give up immediately if rsize is too small to read an entire
4107                  * page. The VFS will fall back to readpage. We should never
4108                  * reach this point however since we set ra_pages to 0 when the
4109                  * rsize is smaller than a cache page.
4110                  */
4111                 if (unlikely(rsize < PAGE_SIZE)) {
4112                         add_credits_and_wake_if(server, credits, 0);
4113                         free_xid(xid);
4114                         return 0;
4115                 }
4116
4117                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4118                                          &nr_pages, &offset, &bytes);
4119                 if (rc) {
4120                         add_credits_and_wake_if(server, credits, 0);
4121                         break;
4122                 }
4123
4124                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4125                 if (!rdata) {
4126                         /* best to give up if we're out of mem */
4127                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4128                                 list_del(&page->lru);
4129                                 lru_cache_add_file(page);
4130                                 unlock_page(page);
4131                                 put_page(page);
4132                         }
4133                         rc = -ENOMEM;
4134                         add_credits_and_wake_if(server, credits, 0);
4135                         break;
4136                 }
4137
4138                 rdata->cfile = cifsFileInfo_get(open_file);
4139                 rdata->mapping = mapping;
4140                 rdata->offset = offset;
4141                 rdata->bytes = bytes;
4142                 rdata->pid = pid;
4143                 rdata->pagesz = PAGE_SIZE;
4144                 rdata->tailsz = PAGE_SIZE;
4145                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4146                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4147                 rdata->credits = credits;
4148
4149                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4150                         list_del(&page->lru);
4151                         rdata->pages[rdata->nr_pages++] = page;
4152                 }
4153
4154                 if (!rdata->cfile->invalidHandle ||
4155                     !(rc = cifs_reopen_file(rdata->cfile, true)))
4156                         rc = server->ops->async_readv(rdata);
4157                 if (rc) {
4158                         add_credits_and_wake_if(server, rdata->credits, 0);
4159                         for (i = 0; i < rdata->nr_pages; i++) {
4160                                 page = rdata->pages[i];
4161                                 lru_cache_add_file(page);
4162                                 unlock_page(page);
4163                                 put_page(page);
4164                         }
4165                         /* Fallback to the readpage in error/reconnect cases */
4166                         kref_put(&rdata->refcount, cifs_readdata_release);
4167                         break;
4168                 }
4169
4170                 kref_put(&rdata->refcount, cifs_readdata_release);
4171         }
4172
4173         /* Any pages that have been shown to fscache but didn't get added to
4174          * the pagecache must be uncached before they get returned to the
4175          * allocator.
4176          */
4177         cifs_fscache_readpages_cancel(mapping->host, page_list);
4178         free_xid(xid);
4179         return rc;
4180 }
4181
4182 /*
4183  * cifs_readpage_worker must be called with the page pinned
4184  */
4185 static int cifs_readpage_worker(struct file *file, struct page *page,
4186         loff_t *poffset)
4187 {
4188         char *read_data;
4189         int rc;
4190
4191         /* Is the page cached? */
4192         rc = cifs_readpage_from_fscache(file_inode(file), page);
4193         if (rc == 0)
4194                 goto read_complete;
4195
4196         read_data = kmap(page);
4197         /* for reads over a certain size could initiate async read ahead */
4198
4199         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4200
4201         if (rc < 0)
4202                 goto io_error;
4203         else
4204                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4205
4206         /* we do not want atime to be less than mtime, it broke some apps */
4207         file_inode(file)->i_atime = current_time(file_inode(file));
4208         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4209                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4210         else
4211                 file_inode(file)->i_atime = current_time(file_inode(file));
4212
4213         if (PAGE_SIZE > rc)
4214                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4215
4216         flush_dcache_page(page);
4217         SetPageUptodate(page);
4218
4219         /* send this page to the cache */
4220         cifs_readpage_to_fscache(file_inode(file), page);
4221
4222         rc = 0;
4223
4224 io_error:
4225         kunmap(page);
4226         unlock_page(page);
4227
4228 read_complete:
4229         return rc;
4230 }
4231
4232 static int cifs_readpage(struct file *file, struct page *page)
4233 {
4234         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4235         int rc = -EACCES;
4236         unsigned int xid;
4237
4238         xid = get_xid();
4239
4240         if (file->private_data == NULL) {
4241                 rc = -EBADF;
4242                 free_xid(xid);
4243                 return rc;
4244         }
4245
4246         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4247                  page, (int)offset, (int)offset);
4248
4249         rc = cifs_readpage_worker(file, page, &offset);
4250
4251         free_xid(xid);
4252         return rc;
4253 }
4254
4255 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4256 {
4257         struct cifsFileInfo *open_file;
4258         struct cifs_tcon *tcon =
4259                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4260
4261         spin_lock(&tcon->open_file_lock);
4262         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4263                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4264                         spin_unlock(&tcon->open_file_lock);
4265                         return 1;
4266                 }
4267         }
4268         spin_unlock(&tcon->open_file_lock);
4269         return 0;
4270 }
4271
4272 /* We do not want to update the file size from server for inodes
4273    open for write - to avoid races with writepage extending
4274    the file - in the future we could consider allowing
4275    refreshing the inode only on increases in the file size
4276    but this is tricky to do without racing with writebehind
4277    page caching in the current Linux kernel design */
4278 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4279 {
4280         if (!cifsInode)
4281                 return true;
4282
4283         if (is_inode_writable(cifsInode)) {
4284                 /* This inode is open for write at least once */
4285                 struct cifs_sb_info *cifs_sb;
4286
4287                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4288                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4289                         /* since no page cache to corrupt on directio
4290                         we can change size safely */
4291                         return true;
4292                 }
4293
4294                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4295                         return true;
4296
4297                 return false;
4298         } else
4299                 return true;
4300 }
4301
4302 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4303                         loff_t pos, unsigned len, unsigned flags,
4304                         struct page **pagep, void **fsdata)
4305 {
4306         int oncethru = 0;
4307         pgoff_t index = pos >> PAGE_SHIFT;
4308         loff_t offset = pos & (PAGE_SIZE - 1);
4309         loff_t page_start = pos & PAGE_MASK;
4310         loff_t i_size;
4311         struct page *page;
4312         int rc = 0;
4313
4314         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4315
4316 start:
4317         page = grab_cache_page_write_begin(mapping, index, flags);
4318         if (!page) {
4319                 rc = -ENOMEM;
4320                 goto out;
4321         }
4322
4323         if (PageUptodate(page))
4324                 goto out;
4325
4326         /*
4327          * If we write a full page it will be up to date, no need to read from
4328          * the server. If the write is short, we'll end up doing a sync write
4329          * instead.
4330          */
4331         if (len == PAGE_SIZE)
4332                 goto out;
4333
4334         /*
4335          * optimize away the read when we have an oplock, and we're not
4336          * expecting to use any of the data we'd be reading in. That
4337          * is, when the page lies beyond the EOF, or straddles the EOF
4338          * and the write will cover all of the existing data.
4339          */
4340         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4341                 i_size = i_size_read(mapping->host);
4342                 if (page_start >= i_size ||
4343                     (offset == 0 && (pos + len) >= i_size)) {
4344                         zero_user_segments(page, 0, offset,
4345                                            offset + len,
4346                                            PAGE_SIZE);
4347                         /*
4348                          * PageChecked means that the parts of the page
4349                          * to which we're not writing are considered up
4350                          * to date. Once the data is copied to the
4351                          * page, it can be set uptodate.
4352                          */
4353                         SetPageChecked(page);
4354                         goto out;
4355                 }
4356         }
4357
4358         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4359                 /*
4360                  * might as well read a page, it is fast enough. If we get
4361                  * an error, we don't need to return it. cifs_write_end will
4362                  * do a sync write instead since PG_uptodate isn't set.
4363                  */
4364                 cifs_readpage_worker(file, page, &page_start);
4365                 put_page(page);
4366                 oncethru = 1;
4367                 goto start;
4368         } else {
4369                 /* we could try using another file handle if there is one -
4370                    but how would we lock it to prevent close of that handle
4371                    racing with this read? In any case
4372                    this will be written out by write_end so is fine */
4373         }
4374 out:
4375         *pagep = page;
4376         return rc;
4377 }
4378
4379 static int cifs_release_page(struct page *page, gfp_t gfp)
4380 {
4381         if (PagePrivate(page))
4382                 return 0;
4383
4384         return cifs_fscache_release_page(page, gfp);
4385 }
4386
4387 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4388                                  unsigned int length)
4389 {
4390         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4391
4392         if (offset == 0 && length == PAGE_SIZE)
4393                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4394 }
4395
4396 static int cifs_launder_page(struct page *page)
4397 {
4398         int rc = 0;
4399         loff_t range_start = page_offset(page);
4400         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4401         struct writeback_control wbc = {
4402                 .sync_mode = WB_SYNC_ALL,
4403                 .nr_to_write = 0,
4404                 .range_start = range_start,
4405                 .range_end = range_end,
4406         };
4407
4408         cifs_dbg(FYI, "Launder page: %p\n", page);
4409
4410         if (clear_page_dirty_for_io(page))
4411                 rc = cifs_writepage_locked(page, &wbc);
4412
4413         cifs_fscache_invalidate_page(page, page->mapping->host);
4414         return rc;
4415 }
4416
4417 void cifs_oplock_break(struct work_struct *work)
4418 {
4419         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4420                                                   oplock_break);
4421         struct inode *inode = d_inode(cfile->dentry);
4422         struct cifsInodeInfo *cinode = CIFS_I(inode);
4423         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4424         struct TCP_Server_Info *server = tcon->ses->server;
4425         int rc = 0;
4426
4427         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4428                         TASK_UNINTERRUPTIBLE);
4429
4430         server->ops->downgrade_oplock(server, cinode,
4431                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4432
4433         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4434                                                 cifs_has_mand_locks(cinode)) {
4435                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4436                          inode);
4437                 cinode->oplock = 0;
4438         }
4439
4440         if (inode && S_ISREG(inode->i_mode)) {
4441                 if (CIFS_CACHE_READ(cinode))
4442                         break_lease(inode, O_RDONLY);
4443                 else
4444                         break_lease(inode, O_WRONLY);
4445                 rc = filemap_fdatawrite(inode->i_mapping);
4446                 if (!CIFS_CACHE_READ(cinode)) {
4447                         rc = filemap_fdatawait(inode->i_mapping);
4448                         mapping_set_error(inode->i_mapping, rc);
4449                         cifs_zap_mapping(inode);
4450                 }
4451                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4452         }
4453
4454         rc = cifs_push_locks(cfile);
4455         if (rc)
4456                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4457
4458         /*
4459          * releasing stale oplock after recent reconnect of smb session using
4460          * a now incorrect file handle is not a data integrity issue but do
4461          * not bother sending an oplock release if session to server still is
4462          * disconnected since oplock already released by the server
4463          */
4464         if (!cfile->oplock_break_cancelled) {
4465                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4466                                                              cinode);
4467                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4468         }
4469         cifs_done_oplock_break(cinode);
4470 }
4471
4472 /*
4473  * The presence of cifs_direct_io() in the address space ops vector
4474  * allowes open() O_DIRECT flags which would have failed otherwise.
4475  *
4476  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4477  * so this method should never be called.
4478  *
4479  * Direct IO is not yet supported in the cached mode. 
4480  */
4481 static ssize_t
4482 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4483 {
4484         /*
4485          * FIXME
4486          * Eventually need to support direct IO for non forcedirectio mounts
4487          */
4488         return -EINVAL;
4489 }
4490
4491
4492 const struct address_space_operations cifs_addr_ops = {
4493         .readpage = cifs_readpage,
4494         .readpages = cifs_readpages,
4495         .writepage = cifs_writepage,
4496         .writepages = cifs_writepages,
4497         .write_begin = cifs_write_begin,
4498         .write_end = cifs_write_end,
4499         .set_page_dirty = __set_page_dirty_nobuffers,
4500         .releasepage = cifs_release_page,
4501         .direct_IO = cifs_direct_io,
4502         .invalidatepage = cifs_invalidate_page,
4503         .launder_page = cifs_launder_page,
4504 };
4505
4506 /*
4507  * cifs_readpages requires the server to support a buffer large enough to
4508  * contain the header plus one complete page of data.  Otherwise, we need
4509  * to leave cifs_readpages out of the address space operations.
4510  */
4511 const struct address_space_operations cifs_addr_ops_smallbuf = {
4512         .readpage = cifs_readpage,
4513         .writepage = cifs_writepage,
4514         .writepages = cifs_writepages,
4515         .write_begin = cifs_write_begin,
4516         .write_end = cifs_write_end,
4517         .set_page_dirty = __set_page_dirty_nobuffers,
4518         .releasepage = cifs_release_page,
4519         .invalidatepage = cifs_invalidate_page,
4520         .launder_page = cifs_launder_page,
4521 };