92fdf9c35de2153fd6f008c4d1306ead6d806c89
[sfrench/cifs-2.6.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228         if (f_flags & O_SYNC)
229                 create_options |= CREATE_WRITE_THROUGH;
230
231         if (f_flags & O_DIRECT)
232                 create_options |= CREATE_NO_BUFFER;
233
234         oparms.tcon = tcon;
235         oparms.cifs_sb = cifs_sb;
236         oparms.desired_access = desired_access;
237         oparms.create_options = create_options;
238         oparms.disposition = disposition;
239         oparms.path = full_path;
240         oparms.fid = fid;
241         oparms.reconnect = false;
242
243         rc = server->ops->open(xid, &oparms, oplock, buf);
244
245         if (rc)
246                 goto out;
247
248         if (tcon->unix_ext)
249                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250                                               xid);
251         else
252                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253                                          xid, fid);
254
255 out:
256         kfree(buf);
257         return rc;
258 }
259
260 static bool
261 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
262 {
263         struct cifs_fid_locks *cur;
264         bool has_locks = false;
265
266         down_read(&cinode->lock_sem);
267         list_for_each_entry(cur, &cinode->llist, llist) {
268                 if (!list_empty(&cur->locks)) {
269                         has_locks = true;
270                         break;
271                 }
272         }
273         up_read(&cinode->lock_sem);
274         return has_locks;
275 }
276
277 struct cifsFileInfo *
278 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279                   struct tcon_link *tlink, __u32 oplock)
280 {
281         struct dentry *dentry = file_dentry(file);
282         struct inode *inode = d_inode(dentry);
283         struct cifsInodeInfo *cinode = CIFS_I(inode);
284         struct cifsFileInfo *cfile;
285         struct cifs_fid_locks *fdlocks;
286         struct cifs_tcon *tcon = tlink_tcon(tlink);
287         struct TCP_Server_Info *server = tcon->ses->server;
288
289         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
290         if (cfile == NULL)
291                 return cfile;
292
293         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
294         if (!fdlocks) {
295                 kfree(cfile);
296                 return NULL;
297         }
298
299         INIT_LIST_HEAD(&fdlocks->locks);
300         fdlocks->cfile = cfile;
301         cfile->llist = fdlocks;
302         down_write(&cinode->lock_sem);
303         list_add(&fdlocks->llist, &cinode->llist);
304         up_write(&cinode->lock_sem);
305
306         cfile->count = 1;
307         cfile->pid = current->tgid;
308         cfile->uid = current_fsuid();
309         cfile->dentry = dget(dentry);
310         cfile->f_flags = file->f_flags;
311         cfile->invalidHandle = false;
312         cfile->tlink = cifs_get_tlink(tlink);
313         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314         mutex_init(&cfile->fh_mutex);
315         spin_lock_init(&cfile->file_info_lock);
316
317         cifs_sb_active(inode->i_sb);
318
319         /*
320          * If the server returned a read oplock and we have mandatory brlocks,
321          * set oplock level to None.
322          */
323         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
325                 oplock = 0;
326         }
327
328         spin_lock(&tcon->open_file_lock);
329         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330                 oplock = fid->pending_open->oplock;
331         list_del(&fid->pending_open->olist);
332
333         fid->purge_cache = false;
334         server->ops->set_fid(cfile, fid, oplock);
335
336         list_add(&cfile->tlist, &tcon->openFileList);
337
338         /* if readable file instance put first in list*/
339         if (file->f_mode & FMODE_READ)
340                 list_add(&cfile->flist, &cinode->openFileList);
341         else
342                 list_add_tail(&cfile->flist, &cinode->openFileList);
343         spin_unlock(&tcon->open_file_lock);
344
345         if (fid->purge_cache)
346                 cifs_zap_mapping(inode);
347
348         file->private_data = cfile;
349         return cfile;
350 }
351
352 struct cifsFileInfo *
353 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
354 {
355         spin_lock(&cifs_file->file_info_lock);
356         cifsFileInfo_get_locked(cifs_file);
357         spin_unlock(&cifs_file->file_info_lock);
358         return cifs_file;
359 }
360
361 /*
362  * Release a reference on the file private data. This may involve closing
363  * the filehandle out on the server. Must be called without holding
364  * tcon->open_file_lock and cifs_file->file_info_lock.
365  */
366 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
367 {
368         struct inode *inode = d_inode(cifs_file->dentry);
369         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
370         struct TCP_Server_Info *server = tcon->ses->server;
371         struct cifsInodeInfo *cifsi = CIFS_I(inode);
372         struct super_block *sb = inode->i_sb;
373         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
374         struct cifsLockInfo *li, *tmp;
375         struct cifs_fid fid;
376         struct cifs_pending_open open;
377         bool oplock_break_cancelled;
378
379         spin_lock(&tcon->open_file_lock);
380
381         spin_lock(&cifs_file->file_info_lock);
382         if (--cifs_file->count > 0) {
383                 spin_unlock(&cifs_file->file_info_lock);
384                 spin_unlock(&tcon->open_file_lock);
385                 return;
386         }
387         spin_unlock(&cifs_file->file_info_lock);
388
389         if (server->ops->get_lease_key)
390                 server->ops->get_lease_key(inode, &fid);
391
392         /* store open in pending opens to make sure we don't miss lease break */
393         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
394
395         /* remove it from the lists */
396         list_del(&cifs_file->flist);
397         list_del(&cifs_file->tlist);
398
399         if (list_empty(&cifsi->openFileList)) {
400                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
401                          d_inode(cifs_file->dentry));
402                 /*
403                  * In strict cache mode we need invalidate mapping on the last
404                  * close  because it may cause a error when we open this file
405                  * again and get at least level II oplock.
406                  */
407                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
408                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
409                 cifs_set_oplock_level(cifsi, 0);
410         }
411
412         spin_unlock(&tcon->open_file_lock);
413
414         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
415
416         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
417                 struct TCP_Server_Info *server = tcon->ses->server;
418                 unsigned int xid;
419
420                 xid = get_xid();
421                 if (server->ops->close)
422                         server->ops->close(xid, tcon, &cifs_file->fid);
423                 _free_xid(xid);
424         }
425
426         if (oplock_break_cancelled)
427                 cifs_done_oplock_break(cifsi);
428
429         cifs_del_pending_open(&open);
430
431         /*
432          * Delete any outstanding lock records. We'll lose them when the file
433          * is closed anyway.
434          */
435         down_write(&cifsi->lock_sem);
436         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
437                 list_del(&li->llist);
438                 cifs_del_lock_waiters(li);
439                 kfree(li);
440         }
441         list_del(&cifs_file->llist->llist);
442         kfree(cifs_file->llist);
443         up_write(&cifsi->lock_sem);
444
445         cifs_put_tlink(cifs_file->tlink);
446         dput(cifs_file->dentry);
447         cifs_sb_deactive(sb);
448         kfree(cifs_file);
449 }
450
451 int cifs_open(struct inode *inode, struct file *file)
452
453 {
454         int rc = -EACCES;
455         unsigned int xid;
456         __u32 oplock;
457         struct cifs_sb_info *cifs_sb;
458         struct TCP_Server_Info *server;
459         struct cifs_tcon *tcon;
460         struct tcon_link *tlink;
461         struct cifsFileInfo *cfile = NULL;
462         char *full_path = NULL;
463         bool posix_open_ok = false;
464         struct cifs_fid fid;
465         struct cifs_pending_open open;
466
467         xid = get_xid();
468
469         cifs_sb = CIFS_SB(inode->i_sb);
470         tlink = cifs_sb_tlink(cifs_sb);
471         if (IS_ERR(tlink)) {
472                 free_xid(xid);
473                 return PTR_ERR(tlink);
474         }
475         tcon = tlink_tcon(tlink);
476         server = tcon->ses->server;
477
478         full_path = build_path_from_dentry(file_dentry(file));
479         if (full_path == NULL) {
480                 rc = -ENOMEM;
481                 goto out;
482         }
483
484         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
485                  inode, file->f_flags, full_path);
486
487         if (file->f_flags & O_DIRECT &&
488             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
489                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
490                         file->f_op = &cifs_file_direct_nobrl_ops;
491                 else
492                         file->f_op = &cifs_file_direct_ops;
493         }
494
495         if (server->oplocks)
496                 oplock = REQ_OPLOCK;
497         else
498                 oplock = 0;
499
500         if (!tcon->broken_posix_open && tcon->unix_ext &&
501             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
502                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
503                 /* can not refresh inode info since size could be stale */
504                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
505                                 cifs_sb->mnt_file_mode /* ignored */,
506                                 file->f_flags, &oplock, &fid.netfid, xid);
507                 if (rc == 0) {
508                         cifs_dbg(FYI, "posix open succeeded\n");
509                         posix_open_ok = true;
510                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
511                         if (tcon->ses->serverNOS)
512                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
513                                          tcon->ses->serverName,
514                                          tcon->ses->serverNOS);
515                         tcon->broken_posix_open = true;
516                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
517                          (rc != -EOPNOTSUPP)) /* path not found or net err */
518                         goto out;
519                 /*
520                  * Else fallthrough to retry open the old way on network i/o
521                  * or DFS errors.
522                  */
523         }
524
525         if (server->ops->get_lease_key)
526                 server->ops->get_lease_key(inode, &fid);
527
528         cifs_add_pending_open(&fid, tlink, &open);
529
530         if (!posix_open_ok) {
531                 if (server->ops->get_lease_key)
532                         server->ops->get_lease_key(inode, &fid);
533
534                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
535                                   file->f_flags, &oplock, &fid, xid);
536                 if (rc) {
537                         cifs_del_pending_open(&open);
538                         goto out;
539                 }
540         }
541
542         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
543         if (cfile == NULL) {
544                 if (server->ops->close)
545                         server->ops->close(xid, tcon, &fid);
546                 cifs_del_pending_open(&open);
547                 rc = -ENOMEM;
548                 goto out;
549         }
550
551         cifs_fscache_set_inode_cookie(inode, file);
552
553         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
554                 /*
555                  * Time to set mode which we can not set earlier due to
556                  * problems creating new read-only files.
557                  */
558                 struct cifs_unix_set_info_args args = {
559                         .mode   = inode->i_mode,
560                         .uid    = INVALID_UID, /* no change */
561                         .gid    = INVALID_GID, /* no change */
562                         .ctime  = NO_CHANGE_64,
563                         .atime  = NO_CHANGE_64,
564                         .mtime  = NO_CHANGE_64,
565                         .device = 0,
566                 };
567                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
568                                        cfile->pid);
569         }
570
571 out:
572         kfree(full_path);
573         free_xid(xid);
574         cifs_put_tlink(tlink);
575         return rc;
576 }
577
578 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
579
580 /*
581  * Try to reacquire byte range locks that were released when session
582  * to server was lost.
583  */
584 static int
585 cifs_relock_file(struct cifsFileInfo *cfile)
586 {
587         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
588         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
589         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
590         int rc = 0;
591
592         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
593         if (cinode->can_cache_brlcks) {
594                 /* can cache locks - no need to relock */
595                 up_read(&cinode->lock_sem);
596                 return rc;
597         }
598
599         if (cap_unix(tcon->ses) &&
600             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
601             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
602                 rc = cifs_push_posix_locks(cfile);
603         else
604                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
605
606         up_read(&cinode->lock_sem);
607         return rc;
608 }
609
610 static int
611 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
612 {
613         int rc = -EACCES;
614         unsigned int xid;
615         __u32 oplock;
616         struct cifs_sb_info *cifs_sb;
617         struct cifs_tcon *tcon;
618         struct TCP_Server_Info *server;
619         struct cifsInodeInfo *cinode;
620         struct inode *inode;
621         char *full_path = NULL;
622         int desired_access;
623         int disposition = FILE_OPEN;
624         int create_options = CREATE_NOT_DIR;
625         struct cifs_open_parms oparms;
626
627         xid = get_xid();
628         mutex_lock(&cfile->fh_mutex);
629         if (!cfile->invalidHandle) {
630                 mutex_unlock(&cfile->fh_mutex);
631                 rc = 0;
632                 free_xid(xid);
633                 return rc;
634         }
635
636         inode = d_inode(cfile->dentry);
637         cifs_sb = CIFS_SB(inode->i_sb);
638         tcon = tlink_tcon(cfile->tlink);
639         server = tcon->ses->server;
640
641         /*
642          * Can not grab rename sem here because various ops, including those
643          * that already have the rename sem can end up causing writepage to get
644          * called and if the server was down that means we end up here, and we
645          * can never tell if the caller already has the rename_sem.
646          */
647         full_path = build_path_from_dentry(cfile->dentry);
648         if (full_path == NULL) {
649                 rc = -ENOMEM;
650                 mutex_unlock(&cfile->fh_mutex);
651                 free_xid(xid);
652                 return rc;
653         }
654
655         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
656                  inode, cfile->f_flags, full_path);
657
658         if (tcon->ses->server->oplocks)
659                 oplock = REQ_OPLOCK;
660         else
661                 oplock = 0;
662
663         if (tcon->unix_ext && cap_unix(tcon->ses) &&
664             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
665                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
666                 /*
667                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
668                  * original open. Must mask them off for a reopen.
669                  */
670                 unsigned int oflags = cfile->f_flags &
671                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
672
673                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
674                                      cifs_sb->mnt_file_mode /* ignored */,
675                                      oflags, &oplock, &cfile->fid.netfid, xid);
676                 if (rc == 0) {
677                         cifs_dbg(FYI, "posix reopen succeeded\n");
678                         oparms.reconnect = true;
679                         goto reopen_success;
680                 }
681                 /*
682                  * fallthrough to retry open the old way on errors, especially
683                  * in the reconnect path it is important to retry hard
684                  */
685         }
686
687         desired_access = cifs_convert_flags(cfile->f_flags);
688
689         if (backup_cred(cifs_sb))
690                 create_options |= CREATE_OPEN_BACKUP_INTENT;
691
692         if (server->ops->get_lease_key)
693                 server->ops->get_lease_key(inode, &cfile->fid);
694
695         oparms.tcon = tcon;
696         oparms.cifs_sb = cifs_sb;
697         oparms.desired_access = desired_access;
698         oparms.create_options = create_options;
699         oparms.disposition = disposition;
700         oparms.path = full_path;
701         oparms.fid = &cfile->fid;
702         oparms.reconnect = true;
703
704         /*
705          * Can not refresh inode by passing in file_info buf to be returned by
706          * ops->open and then calling get_inode_info with returned buf since
707          * file might have write behind data that needs to be flushed and server
708          * version of file size can be stale. If we knew for sure that inode was
709          * not dirty locally we could do this.
710          */
711         rc = server->ops->open(xid, &oparms, &oplock, NULL);
712         if (rc == -ENOENT && oparms.reconnect == false) {
713                 /* durable handle timeout is expired - open the file again */
714                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715                 /* indicate that we need to relock the file */
716                 oparms.reconnect = true;
717         }
718
719         if (rc) {
720                 mutex_unlock(&cfile->fh_mutex);
721                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
722                 cifs_dbg(FYI, "oplock: %d\n", oplock);
723                 goto reopen_error_exit;
724         }
725
726 reopen_success:
727         cfile->invalidHandle = false;
728         mutex_unlock(&cfile->fh_mutex);
729         cinode = CIFS_I(inode);
730
731         if (can_flush) {
732                 rc = filemap_write_and_wait(inode->i_mapping);
733                 mapping_set_error(inode->i_mapping, rc);
734
735                 if (tcon->unix_ext)
736                         rc = cifs_get_inode_info_unix(&inode, full_path,
737                                                       inode->i_sb, xid);
738                 else
739                         rc = cifs_get_inode_info(&inode, full_path, NULL,
740                                                  inode->i_sb, xid, NULL);
741         }
742         /*
743          * Else we are writing out data to server already and could deadlock if
744          * we tried to flush data, and since we do not know if we have data that
745          * would invalidate the current end of file on the server we can not go
746          * to the server to get the new inode info.
747          */
748
749         /*
750          * If the server returned a read oplock and we have mandatory brlocks,
751          * set oplock level to None.
752          */
753         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
754                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
755                 oplock = 0;
756         }
757
758         server->ops->set_fid(cfile, &cfile->fid, oplock);
759         if (oparms.reconnect)
760                 cifs_relock_file(cfile);
761
762 reopen_error_exit:
763         kfree(full_path);
764         free_xid(xid);
765         return rc;
766 }
767
768 int cifs_close(struct inode *inode, struct file *file)
769 {
770         if (file->private_data != NULL) {
771                 cifsFileInfo_put(file->private_data);
772                 file->private_data = NULL;
773         }
774
775         /* return code from the ->release op is always ignored */
776         return 0;
777 }
778
779 void
780 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
781 {
782         struct cifsFileInfo *open_file;
783         struct list_head *tmp;
784         struct list_head *tmp1;
785         struct list_head tmp_list;
786
787         if (!tcon->use_persistent || !tcon->need_reopen_files)
788                 return;
789
790         tcon->need_reopen_files = false;
791
792         cifs_dbg(FYI, "Reopen persistent handles");
793         INIT_LIST_HEAD(&tmp_list);
794
795         /* list all files open on tree connection, reopen resilient handles  */
796         spin_lock(&tcon->open_file_lock);
797         list_for_each(tmp, &tcon->openFileList) {
798                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
799                 if (!open_file->invalidHandle)
800                         continue;
801                 cifsFileInfo_get(open_file);
802                 list_add_tail(&open_file->rlist, &tmp_list);
803         }
804         spin_unlock(&tcon->open_file_lock);
805
806         list_for_each_safe(tmp, tmp1, &tmp_list) {
807                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
808                 if (cifs_reopen_file(open_file, false /* do not flush */))
809                         tcon->need_reopen_files = true;
810                 list_del_init(&open_file->rlist);
811                 cifsFileInfo_put(open_file);
812         }
813 }
814
815 int cifs_closedir(struct inode *inode, struct file *file)
816 {
817         int rc = 0;
818         unsigned int xid;
819         struct cifsFileInfo *cfile = file->private_data;
820         struct cifs_tcon *tcon;
821         struct TCP_Server_Info *server;
822         char *buf;
823
824         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
825
826         if (cfile == NULL)
827                 return rc;
828
829         xid = get_xid();
830         tcon = tlink_tcon(cfile->tlink);
831         server = tcon->ses->server;
832
833         cifs_dbg(FYI, "Freeing private data in close dir\n");
834         spin_lock(&cfile->file_info_lock);
835         if (server->ops->dir_needs_close(cfile)) {
836                 cfile->invalidHandle = true;
837                 spin_unlock(&cfile->file_info_lock);
838                 if (server->ops->close_dir)
839                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
840                 else
841                         rc = -ENOSYS;
842                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
843                 /* not much we can do if it fails anyway, ignore rc */
844                 rc = 0;
845         } else
846                 spin_unlock(&cfile->file_info_lock);
847
848         buf = cfile->srch_inf.ntwrk_buf_start;
849         if (buf) {
850                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
851                 cfile->srch_inf.ntwrk_buf_start = NULL;
852                 if (cfile->srch_inf.smallBuf)
853                         cifs_small_buf_release(buf);
854                 else
855                         cifs_buf_release(buf);
856         }
857
858         cifs_put_tlink(cfile->tlink);
859         kfree(file->private_data);
860         file->private_data = NULL;
861         /* BB can we lock the filestruct while this is going on? */
862         free_xid(xid);
863         return rc;
864 }
865
866 static struct cifsLockInfo *
867 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
868 {
869         struct cifsLockInfo *lock =
870                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
871         if (!lock)
872                 return lock;
873         lock->offset = offset;
874         lock->length = length;
875         lock->type = type;
876         lock->pid = current->tgid;
877         INIT_LIST_HEAD(&lock->blist);
878         init_waitqueue_head(&lock->block_q);
879         return lock;
880 }
881
882 void
883 cifs_del_lock_waiters(struct cifsLockInfo *lock)
884 {
885         struct cifsLockInfo *li, *tmp;
886         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
887                 list_del_init(&li->blist);
888                 wake_up(&li->block_q);
889         }
890 }
891
892 #define CIFS_LOCK_OP    0
893 #define CIFS_READ_OP    1
894 #define CIFS_WRITE_OP   2
895
896 /* @rw_check : 0 - no op, 1 - read, 2 - write */
897 static bool
898 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
899                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
900                             struct cifsLockInfo **conf_lock, int rw_check)
901 {
902         struct cifsLockInfo *li;
903         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
904         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
905
906         list_for_each_entry(li, &fdlocks->locks, llist) {
907                 if (offset + length <= li->offset ||
908                     offset >= li->offset + li->length)
909                         continue;
910                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
911                     server->ops->compare_fids(cfile, cur_cfile)) {
912                         /* shared lock prevents write op through the same fid */
913                         if (!(li->type & server->vals->shared_lock_type) ||
914                             rw_check != CIFS_WRITE_OP)
915                                 continue;
916                 }
917                 if ((type & server->vals->shared_lock_type) &&
918                     ((server->ops->compare_fids(cfile, cur_cfile) &&
919                      current->tgid == li->pid) || type == li->type))
920                         continue;
921                 if (conf_lock)
922                         *conf_lock = li;
923                 return true;
924         }
925         return false;
926 }
927
928 bool
929 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
930                         __u8 type, struct cifsLockInfo **conf_lock,
931                         int rw_check)
932 {
933         bool rc = false;
934         struct cifs_fid_locks *cur;
935         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
936
937         list_for_each_entry(cur, &cinode->llist, llist) {
938                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
939                                                  cfile, conf_lock, rw_check);
940                 if (rc)
941                         break;
942         }
943
944         return rc;
945 }
946
947 /*
948  * Check if there is another lock that prevents us to set the lock (mandatory
949  * style). If such a lock exists, update the flock structure with its
950  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
951  * or leave it the same if we can't. Returns 0 if we don't need to request to
952  * the server or 1 otherwise.
953  */
954 static int
955 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
956                __u8 type, struct file_lock *flock)
957 {
958         int rc = 0;
959         struct cifsLockInfo *conf_lock;
960         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
961         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
962         bool exist;
963
964         down_read(&cinode->lock_sem);
965
966         exist = cifs_find_lock_conflict(cfile, offset, length, type,
967                                         &conf_lock, CIFS_LOCK_OP);
968         if (exist) {
969                 flock->fl_start = conf_lock->offset;
970                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
971                 flock->fl_pid = conf_lock->pid;
972                 if (conf_lock->type & server->vals->shared_lock_type)
973                         flock->fl_type = F_RDLCK;
974                 else
975                         flock->fl_type = F_WRLCK;
976         } else if (!cinode->can_cache_brlcks)
977                 rc = 1;
978         else
979                 flock->fl_type = F_UNLCK;
980
981         up_read(&cinode->lock_sem);
982         return rc;
983 }
984
985 static void
986 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
987 {
988         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
989         down_write(&cinode->lock_sem);
990         list_add_tail(&lock->llist, &cfile->llist->locks);
991         up_write(&cinode->lock_sem);
992 }
993
994 /*
995  * Set the byte-range lock (mandatory style). Returns:
996  * 1) 0, if we set the lock and don't need to request to the server;
997  * 2) 1, if no locks prevent us but we need to request to the server;
998  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
999  */
1000 static int
1001 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1002                  bool wait)
1003 {
1004         struct cifsLockInfo *conf_lock;
1005         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1006         bool exist;
1007         int rc = 0;
1008
1009 try_again:
1010         exist = false;
1011         down_write(&cinode->lock_sem);
1012
1013         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1014                                         lock->type, &conf_lock, CIFS_LOCK_OP);
1015         if (!exist && cinode->can_cache_brlcks) {
1016                 list_add_tail(&lock->llist, &cfile->llist->locks);
1017                 up_write(&cinode->lock_sem);
1018                 return rc;
1019         }
1020
1021         if (!exist)
1022                 rc = 1;
1023         else if (!wait)
1024                 rc = -EACCES;
1025         else {
1026                 list_add_tail(&lock->blist, &conf_lock->blist);
1027                 up_write(&cinode->lock_sem);
1028                 rc = wait_event_interruptible(lock->block_q,
1029                                         (lock->blist.prev == &lock->blist) &&
1030                                         (lock->blist.next == &lock->blist));
1031                 if (!rc)
1032                         goto try_again;
1033                 down_write(&cinode->lock_sem);
1034                 list_del_init(&lock->blist);
1035         }
1036
1037         up_write(&cinode->lock_sem);
1038         return rc;
1039 }
1040
1041 /*
1042  * Check if there is another lock that prevents us to set the lock (posix
1043  * style). If such a lock exists, update the flock structure with its
1044  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1045  * or leave it the same if we can't. Returns 0 if we don't need to request to
1046  * the server or 1 otherwise.
1047  */
1048 static int
1049 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1050 {
1051         int rc = 0;
1052         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1053         unsigned char saved_type = flock->fl_type;
1054
1055         if ((flock->fl_flags & FL_POSIX) == 0)
1056                 return 1;
1057
1058         down_read(&cinode->lock_sem);
1059         posix_test_lock(file, flock);
1060
1061         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1062                 flock->fl_type = saved_type;
1063                 rc = 1;
1064         }
1065
1066         up_read(&cinode->lock_sem);
1067         return rc;
1068 }
1069
1070 /*
1071  * Set the byte-range lock (posix style). Returns:
1072  * 1) 0, if we set the lock and don't need to request to the server;
1073  * 2) 1, if we need to request to the server;
1074  * 3) <0, if the error occurs while setting the lock.
1075  */
1076 static int
1077 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1078 {
1079         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1080         int rc = 1;
1081
1082         if ((flock->fl_flags & FL_POSIX) == 0)
1083                 return rc;
1084
1085 try_again:
1086         down_write(&cinode->lock_sem);
1087         if (!cinode->can_cache_brlcks) {
1088                 up_write(&cinode->lock_sem);
1089                 return rc;
1090         }
1091
1092         rc = posix_lock_file(file, flock, NULL);
1093         up_write(&cinode->lock_sem);
1094         if (rc == FILE_LOCK_DEFERRED) {
1095                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1096                 if (!rc)
1097                         goto try_again;
1098                 posix_unblock_lock(flock);
1099         }
1100         return rc;
1101 }
1102
1103 int
1104 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1105 {
1106         unsigned int xid;
1107         int rc = 0, stored_rc;
1108         struct cifsLockInfo *li, *tmp;
1109         struct cifs_tcon *tcon;
1110         unsigned int num, max_num, max_buf;
1111         LOCKING_ANDX_RANGE *buf, *cur;
1112         static const int types[] = {
1113                 LOCKING_ANDX_LARGE_FILES,
1114                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1115         };
1116         int i;
1117
1118         xid = get_xid();
1119         tcon = tlink_tcon(cfile->tlink);
1120
1121         /*
1122          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1123          * and check it for zero before using.
1124          */
1125         max_buf = tcon->ses->server->maxBuf;
1126         if (!max_buf) {
1127                 free_xid(xid);
1128                 return -EINVAL;
1129         }
1130
1131         max_num = (max_buf - sizeof(struct smb_hdr)) /
1132                                                 sizeof(LOCKING_ANDX_RANGE);
1133         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1134         if (!buf) {
1135                 free_xid(xid);
1136                 return -ENOMEM;
1137         }
1138
1139         for (i = 0; i < 2; i++) {
1140                 cur = buf;
1141                 num = 0;
1142                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1143                         if (li->type != types[i])
1144                                 continue;
1145                         cur->Pid = cpu_to_le16(li->pid);
1146                         cur->LengthLow = cpu_to_le32((u32)li->length);
1147                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1148                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1149                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1150                         if (++num == max_num) {
1151                                 stored_rc = cifs_lockv(xid, tcon,
1152                                                        cfile->fid.netfid,
1153                                                        (__u8)li->type, 0, num,
1154                                                        buf);
1155                                 if (stored_rc)
1156                                         rc = stored_rc;
1157                                 cur = buf;
1158                                 num = 0;
1159                         } else
1160                                 cur++;
1161                 }
1162
1163                 if (num) {
1164                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1165                                                (__u8)types[i], 0, num, buf);
1166                         if (stored_rc)
1167                                 rc = stored_rc;
1168                 }
1169         }
1170
1171         kfree(buf);
1172         free_xid(xid);
1173         return rc;
1174 }
1175
1176 static __u32
1177 hash_lockowner(fl_owner_t owner)
1178 {
1179         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1180 }
1181
1182 struct lock_to_push {
1183         struct list_head llist;
1184         __u64 offset;
1185         __u64 length;
1186         __u32 pid;
1187         __u16 netfid;
1188         __u8 type;
1189 };
1190
1191 static int
1192 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1193 {
1194         struct inode *inode = d_inode(cfile->dentry);
1195         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1196         struct file_lock *flock;
1197         struct file_lock_context *flctx = inode->i_flctx;
1198         unsigned int count = 0, i;
1199         int rc = 0, xid, type;
1200         struct list_head locks_to_send, *el;
1201         struct lock_to_push *lck, *tmp;
1202         __u64 length;
1203
1204         xid = get_xid();
1205
1206         if (!flctx)
1207                 goto out;
1208
1209         spin_lock(&flctx->flc_lock);
1210         list_for_each(el, &flctx->flc_posix) {
1211                 count++;
1212         }
1213         spin_unlock(&flctx->flc_lock);
1214
1215         INIT_LIST_HEAD(&locks_to_send);
1216
1217         /*
1218          * Allocating count locks is enough because no FL_POSIX locks can be
1219          * added to the list while we are holding cinode->lock_sem that
1220          * protects locking operations of this inode.
1221          */
1222         for (i = 0; i < count; i++) {
1223                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1224                 if (!lck) {
1225                         rc = -ENOMEM;
1226                         goto err_out;
1227                 }
1228                 list_add_tail(&lck->llist, &locks_to_send);
1229         }
1230
1231         el = locks_to_send.next;
1232         spin_lock(&flctx->flc_lock);
1233         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1234                 if (el == &locks_to_send) {
1235                         /*
1236                          * The list ended. We don't have enough allocated
1237                          * structures - something is really wrong.
1238                          */
1239                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1240                         break;
1241                 }
1242                 length = 1 + flock->fl_end - flock->fl_start;
1243                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1244                         type = CIFS_RDLCK;
1245                 else
1246                         type = CIFS_WRLCK;
1247                 lck = list_entry(el, struct lock_to_push, llist);
1248                 lck->pid = hash_lockowner(flock->fl_owner);
1249                 lck->netfid = cfile->fid.netfid;
1250                 lck->length = length;
1251                 lck->type = type;
1252                 lck->offset = flock->fl_start;
1253         }
1254         spin_unlock(&flctx->flc_lock);
1255
1256         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1257                 int stored_rc;
1258
1259                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1260                                              lck->offset, lck->length, NULL,
1261                                              lck->type, 0);
1262                 if (stored_rc)
1263                         rc = stored_rc;
1264                 list_del(&lck->llist);
1265                 kfree(lck);
1266         }
1267
1268 out:
1269         free_xid(xid);
1270         return rc;
1271 err_out:
1272         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1273                 list_del(&lck->llist);
1274                 kfree(lck);
1275         }
1276         goto out;
1277 }
1278
1279 static int
1280 cifs_push_locks(struct cifsFileInfo *cfile)
1281 {
1282         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1283         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1285         int rc = 0;
1286
1287         /* we are going to update can_cache_brlcks here - need a write access */
1288         down_write(&cinode->lock_sem);
1289         if (!cinode->can_cache_brlcks) {
1290                 up_write(&cinode->lock_sem);
1291                 return rc;
1292         }
1293
1294         if (cap_unix(tcon->ses) &&
1295             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1296             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1297                 rc = cifs_push_posix_locks(cfile);
1298         else
1299                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1300
1301         cinode->can_cache_brlcks = false;
1302         up_write(&cinode->lock_sem);
1303         return rc;
1304 }
1305
1306 static void
1307 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1308                 bool *wait_flag, struct TCP_Server_Info *server)
1309 {
1310         if (flock->fl_flags & FL_POSIX)
1311                 cifs_dbg(FYI, "Posix\n");
1312         if (flock->fl_flags & FL_FLOCK)
1313                 cifs_dbg(FYI, "Flock\n");
1314         if (flock->fl_flags & FL_SLEEP) {
1315                 cifs_dbg(FYI, "Blocking lock\n");
1316                 *wait_flag = true;
1317         }
1318         if (flock->fl_flags & FL_ACCESS)
1319                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1320         if (flock->fl_flags & FL_LEASE)
1321                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1322         if (flock->fl_flags &
1323             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1324                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1325                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1326
1327         *type = server->vals->large_lock_type;
1328         if (flock->fl_type == F_WRLCK) {
1329                 cifs_dbg(FYI, "F_WRLCK\n");
1330                 *type |= server->vals->exclusive_lock_type;
1331                 *lock = 1;
1332         } else if (flock->fl_type == F_UNLCK) {
1333                 cifs_dbg(FYI, "F_UNLCK\n");
1334                 *type |= server->vals->unlock_lock_type;
1335                 *unlock = 1;
1336                 /* Check if unlock includes more than one lock range */
1337         } else if (flock->fl_type == F_RDLCK) {
1338                 cifs_dbg(FYI, "F_RDLCK\n");
1339                 *type |= server->vals->shared_lock_type;
1340                 *lock = 1;
1341         } else if (flock->fl_type == F_EXLCK) {
1342                 cifs_dbg(FYI, "F_EXLCK\n");
1343                 *type |= server->vals->exclusive_lock_type;
1344                 *lock = 1;
1345         } else if (flock->fl_type == F_SHLCK) {
1346                 cifs_dbg(FYI, "F_SHLCK\n");
1347                 *type |= server->vals->shared_lock_type;
1348                 *lock = 1;
1349         } else
1350                 cifs_dbg(FYI, "Unknown type of lock\n");
1351 }
1352
1353 static int
1354 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1355            bool wait_flag, bool posix_lck, unsigned int xid)
1356 {
1357         int rc = 0;
1358         __u64 length = 1 + flock->fl_end - flock->fl_start;
1359         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361         struct TCP_Server_Info *server = tcon->ses->server;
1362         __u16 netfid = cfile->fid.netfid;
1363
1364         if (posix_lck) {
1365                 int posix_lock_type;
1366
1367                 rc = cifs_posix_lock_test(file, flock);
1368                 if (!rc)
1369                         return rc;
1370
1371                 if (type & server->vals->shared_lock_type)
1372                         posix_lock_type = CIFS_RDLCK;
1373                 else
1374                         posix_lock_type = CIFS_WRLCK;
1375                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1376                                       hash_lockowner(flock->fl_owner),
1377                                       flock->fl_start, length, flock,
1378                                       posix_lock_type, wait_flag);
1379                 return rc;
1380         }
1381
1382         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1383         if (!rc)
1384                 return rc;
1385
1386         /* BB we could chain these into one lock request BB */
1387         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1388                                     1, 0, false);
1389         if (rc == 0) {
1390                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1391                                             type, 0, 1, false);
1392                 flock->fl_type = F_UNLCK;
1393                 if (rc != 0)
1394                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1395                                  rc);
1396                 return 0;
1397         }
1398
1399         if (type & server->vals->shared_lock_type) {
1400                 flock->fl_type = F_WRLCK;
1401                 return 0;
1402         }
1403
1404         type &= ~server->vals->exclusive_lock_type;
1405
1406         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1407                                     type | server->vals->shared_lock_type,
1408                                     1, 0, false);
1409         if (rc == 0) {
1410                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1411                         type | server->vals->shared_lock_type, 0, 1, false);
1412                 flock->fl_type = F_RDLCK;
1413                 if (rc != 0)
1414                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1415                                  rc);
1416         } else
1417                 flock->fl_type = F_WRLCK;
1418
1419         return 0;
1420 }
1421
1422 void
1423 cifs_move_llist(struct list_head *source, struct list_head *dest)
1424 {
1425         struct list_head *li, *tmp;
1426         list_for_each_safe(li, tmp, source)
1427                 list_move(li, dest);
1428 }
1429
1430 void
1431 cifs_free_llist(struct list_head *llist)
1432 {
1433         struct cifsLockInfo *li, *tmp;
1434         list_for_each_entry_safe(li, tmp, llist, llist) {
1435                 cifs_del_lock_waiters(li);
1436                 list_del(&li->llist);
1437                 kfree(li);
1438         }
1439 }
1440
1441 int
1442 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1443                   unsigned int xid)
1444 {
1445         int rc = 0, stored_rc;
1446         static const int types[] = {
1447                 LOCKING_ANDX_LARGE_FILES,
1448                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1449         };
1450         unsigned int i;
1451         unsigned int max_num, num, max_buf;
1452         LOCKING_ANDX_RANGE *buf, *cur;
1453         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1454         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1455         struct cifsLockInfo *li, *tmp;
1456         __u64 length = 1 + flock->fl_end - flock->fl_start;
1457         struct list_head tmp_llist;
1458
1459         INIT_LIST_HEAD(&tmp_llist);
1460
1461         /*
1462          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1463          * and check it for zero before using.
1464          */
1465         max_buf = tcon->ses->server->maxBuf;
1466         if (!max_buf)
1467                 return -EINVAL;
1468
1469         max_num = (max_buf - sizeof(struct smb_hdr)) /
1470                                                 sizeof(LOCKING_ANDX_RANGE);
1471         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1472         if (!buf)
1473                 return -ENOMEM;
1474
1475         down_write(&cinode->lock_sem);
1476         for (i = 0; i < 2; i++) {
1477                 cur = buf;
1478                 num = 0;
1479                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1480                         if (flock->fl_start > li->offset ||
1481                             (flock->fl_start + length) <
1482                             (li->offset + li->length))
1483                                 continue;
1484                         if (current->tgid != li->pid)
1485                                 continue;
1486                         if (types[i] != li->type)
1487                                 continue;
1488                         if (cinode->can_cache_brlcks) {
1489                                 /*
1490                                  * We can cache brlock requests - simply remove
1491                                  * a lock from the file's list.
1492                                  */
1493                                 list_del(&li->llist);
1494                                 cifs_del_lock_waiters(li);
1495                                 kfree(li);
1496                                 continue;
1497                         }
1498                         cur->Pid = cpu_to_le16(li->pid);
1499                         cur->LengthLow = cpu_to_le32((u32)li->length);
1500                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1501                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1502                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1503                         /*
1504                          * We need to save a lock here to let us add it again to
1505                          * the file's list if the unlock range request fails on
1506                          * the server.
1507                          */
1508                         list_move(&li->llist, &tmp_llist);
1509                         if (++num == max_num) {
1510                                 stored_rc = cifs_lockv(xid, tcon,
1511                                                        cfile->fid.netfid,
1512                                                        li->type, num, 0, buf);
1513                                 if (stored_rc) {
1514                                         /*
1515                                          * We failed on the unlock range
1516                                          * request - add all locks from the tmp
1517                                          * list to the head of the file's list.
1518                                          */
1519                                         cifs_move_llist(&tmp_llist,
1520                                                         &cfile->llist->locks);
1521                                         rc = stored_rc;
1522                                 } else
1523                                         /*
1524                                          * The unlock range request succeed -
1525                                          * free the tmp list.
1526                                          */
1527                                         cifs_free_llist(&tmp_llist);
1528                                 cur = buf;
1529                                 num = 0;
1530                         } else
1531                                 cur++;
1532                 }
1533                 if (num) {
1534                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1535                                                types[i], num, 0, buf);
1536                         if (stored_rc) {
1537                                 cifs_move_llist(&tmp_llist,
1538                                                 &cfile->llist->locks);
1539                                 rc = stored_rc;
1540                         } else
1541                                 cifs_free_llist(&tmp_llist);
1542                 }
1543         }
1544
1545         up_write(&cinode->lock_sem);
1546         kfree(buf);
1547         return rc;
1548 }
1549
1550 static int
1551 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1552            bool wait_flag, bool posix_lck, int lock, int unlock,
1553            unsigned int xid)
1554 {
1555         int rc = 0;
1556         __u64 length = 1 + flock->fl_end - flock->fl_start;
1557         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1558         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1559         struct TCP_Server_Info *server = tcon->ses->server;
1560         struct inode *inode = d_inode(cfile->dentry);
1561
1562         if (posix_lck) {
1563                 int posix_lock_type;
1564
1565                 rc = cifs_posix_lock_set(file, flock);
1566                 if (!rc || rc < 0)
1567                         return rc;
1568
1569                 if (type & server->vals->shared_lock_type)
1570                         posix_lock_type = CIFS_RDLCK;
1571                 else
1572                         posix_lock_type = CIFS_WRLCK;
1573
1574                 if (unlock == 1)
1575                         posix_lock_type = CIFS_UNLCK;
1576
1577                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1578                                       hash_lockowner(flock->fl_owner),
1579                                       flock->fl_start, length,
1580                                       NULL, posix_lock_type, wait_flag);
1581                 goto out;
1582         }
1583
1584         if (lock) {
1585                 struct cifsLockInfo *lock;
1586
1587                 lock = cifs_lock_init(flock->fl_start, length, type);
1588                 if (!lock)
1589                         return -ENOMEM;
1590
1591                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1592                 if (rc < 0) {
1593                         kfree(lock);
1594                         return rc;
1595                 }
1596                 if (!rc)
1597                         goto out;
1598
1599                 /*
1600                  * Windows 7 server can delay breaking lease from read to None
1601                  * if we set a byte-range lock on a file - break it explicitly
1602                  * before sending the lock to the server to be sure the next
1603                  * read won't conflict with non-overlapted locks due to
1604                  * pagereading.
1605                  */
1606                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1607                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1608                         cifs_zap_mapping(inode);
1609                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1610                                  inode);
1611                         CIFS_I(inode)->oplock = 0;
1612                 }
1613
1614                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1615                                             type, 1, 0, wait_flag);
1616                 if (rc) {
1617                         kfree(lock);
1618                         return rc;
1619                 }
1620
1621                 cifs_lock_add(cfile, lock);
1622         } else if (unlock)
1623                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1624
1625 out:
1626         if (flock->fl_flags & FL_POSIX && !rc)
1627                 rc = locks_lock_file_wait(file, flock);
1628         return rc;
1629 }
1630
1631 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1632 {
1633         int rc, xid;
1634         int lock = 0, unlock = 0;
1635         bool wait_flag = false;
1636         bool posix_lck = false;
1637         struct cifs_sb_info *cifs_sb;
1638         struct cifs_tcon *tcon;
1639         struct cifsInodeInfo *cinode;
1640         struct cifsFileInfo *cfile;
1641         __u16 netfid;
1642         __u32 type;
1643
1644         rc = -EACCES;
1645         xid = get_xid();
1646
1647         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1648                  cmd, flock->fl_flags, flock->fl_type,
1649                  flock->fl_start, flock->fl_end);
1650
1651         cfile = (struct cifsFileInfo *)file->private_data;
1652         tcon = tlink_tcon(cfile->tlink);
1653
1654         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1655                         tcon->ses->server);
1656
1657         cifs_sb = CIFS_FILE_SB(file);
1658         netfid = cfile->fid.netfid;
1659         cinode = CIFS_I(file_inode(file));
1660
1661         if (cap_unix(tcon->ses) &&
1662             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1663             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1664                 posix_lck = true;
1665         /*
1666          * BB add code here to normalize offset and length to account for
1667          * negative length which we can not accept over the wire.
1668          */
1669         if (IS_GETLK(cmd)) {
1670                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1671                 free_xid(xid);
1672                 return rc;
1673         }
1674
1675         if (!lock && !unlock) {
1676                 /*
1677                  * if no lock or unlock then nothing to do since we do not
1678                  * know what it is
1679                  */
1680                 free_xid(xid);
1681                 return -EOPNOTSUPP;
1682         }
1683
1684         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1685                         xid);
1686         free_xid(xid);
1687         return rc;
1688 }
1689
1690 /*
1691  * update the file size (if needed) after a write. Should be called with
1692  * the inode->i_lock held
1693  */
1694 void
1695 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1696                       unsigned int bytes_written)
1697 {
1698         loff_t end_of_write = offset + bytes_written;
1699
1700         if (end_of_write > cifsi->server_eof)
1701                 cifsi->server_eof = end_of_write;
1702 }
1703
1704 static ssize_t
1705 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1706            size_t write_size, loff_t *offset)
1707 {
1708         int rc = 0;
1709         unsigned int bytes_written = 0;
1710         unsigned int total_written;
1711         struct cifs_sb_info *cifs_sb;
1712         struct cifs_tcon *tcon;
1713         struct TCP_Server_Info *server;
1714         unsigned int xid;
1715         struct dentry *dentry = open_file->dentry;
1716         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1717         struct cifs_io_parms io_parms;
1718
1719         cifs_sb = CIFS_SB(dentry->d_sb);
1720
1721         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1722                  write_size, *offset, dentry);
1723
1724         tcon = tlink_tcon(open_file->tlink);
1725         server = tcon->ses->server;
1726
1727         if (!server->ops->sync_write)
1728                 return -ENOSYS;
1729
1730         xid = get_xid();
1731
1732         for (total_written = 0; write_size > total_written;
1733              total_written += bytes_written) {
1734                 rc = -EAGAIN;
1735                 while (rc == -EAGAIN) {
1736                         struct kvec iov[2];
1737                         unsigned int len;
1738
1739                         if (open_file->invalidHandle) {
1740                                 /* we could deadlock if we called
1741                                    filemap_fdatawait from here so tell
1742                                    reopen_file not to flush data to
1743                                    server now */
1744                                 rc = cifs_reopen_file(open_file, false);
1745                                 if (rc != 0)
1746                                         break;
1747                         }
1748
1749                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1750                                   (unsigned int)write_size - total_written);
1751                         /* iov[0] is reserved for smb header */
1752                         iov[1].iov_base = (char *)write_data + total_written;
1753                         iov[1].iov_len = len;
1754                         io_parms.pid = pid;
1755                         io_parms.tcon = tcon;
1756                         io_parms.offset = *offset;
1757                         io_parms.length = len;
1758                         rc = server->ops->sync_write(xid, &open_file->fid,
1759                                         &io_parms, &bytes_written, iov, 1);
1760                 }
1761                 if (rc || (bytes_written == 0)) {
1762                         if (total_written)
1763                                 break;
1764                         else {
1765                                 free_xid(xid);
1766                                 return rc;
1767                         }
1768                 } else {
1769                         spin_lock(&d_inode(dentry)->i_lock);
1770                         cifs_update_eof(cifsi, *offset, bytes_written);
1771                         spin_unlock(&d_inode(dentry)->i_lock);
1772                         *offset += bytes_written;
1773                 }
1774         }
1775
1776         cifs_stats_bytes_written(tcon, total_written);
1777
1778         if (total_written > 0) {
1779                 spin_lock(&d_inode(dentry)->i_lock);
1780                 if (*offset > d_inode(dentry)->i_size)
1781                         i_size_write(d_inode(dentry), *offset);
1782                 spin_unlock(&d_inode(dentry)->i_lock);
1783         }
1784         mark_inode_dirty_sync(d_inode(dentry));
1785         free_xid(xid);
1786         return total_written;
1787 }
1788
1789 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1790                                         bool fsuid_only)
1791 {
1792         struct cifsFileInfo *open_file = NULL;
1793         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1794         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1795
1796         /* only filter by fsuid on multiuser mounts */
1797         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1798                 fsuid_only = false;
1799
1800         spin_lock(&tcon->open_file_lock);
1801         /* we could simply get the first_list_entry since write-only entries
1802            are always at the end of the list but since the first entry might
1803            have a close pending, we go through the whole list */
1804         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1805                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1806                         continue;
1807                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1808                         if (!open_file->invalidHandle) {
1809                                 /* found a good file */
1810                                 /* lock it so it will not be closed on us */
1811                                 cifsFileInfo_get(open_file);
1812                                 spin_unlock(&tcon->open_file_lock);
1813                                 return open_file;
1814                         } /* else might as well continue, and look for
1815                              another, or simply have the caller reopen it
1816                              again rather than trying to fix this handle */
1817                 } else /* write only file */
1818                         break; /* write only files are last so must be done */
1819         }
1820         spin_unlock(&tcon->open_file_lock);
1821         return NULL;
1822 }
1823
1824 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1825                                         bool fsuid_only)
1826 {
1827         struct cifsFileInfo *open_file, *inv_file = NULL;
1828         struct cifs_sb_info *cifs_sb;
1829         struct cifs_tcon *tcon;
1830         bool any_available = false;
1831         int rc;
1832         unsigned int refind = 0;
1833
1834         /* Having a null inode here (because mapping->host was set to zero by
1835         the VFS or MM) should not happen but we had reports of on oops (due to
1836         it being zero) during stress testcases so we need to check for it */
1837
1838         if (cifs_inode == NULL) {
1839                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1840                 dump_stack();
1841                 return NULL;
1842         }
1843
1844         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1845         tcon = cifs_sb_master_tcon(cifs_sb);
1846
1847         /* only filter by fsuid on multiuser mounts */
1848         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1849                 fsuid_only = false;
1850
1851         spin_lock(&tcon->open_file_lock);
1852 refind_writable:
1853         if (refind > MAX_REOPEN_ATT) {
1854                 spin_unlock(&tcon->open_file_lock);
1855                 return NULL;
1856         }
1857         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1858                 if (!any_available && open_file->pid != current->tgid)
1859                         continue;
1860                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1861                         continue;
1862                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1863                         if (!open_file->invalidHandle) {
1864                                 /* found a good writable file */
1865                                 cifsFileInfo_get(open_file);
1866                                 spin_unlock(&tcon->open_file_lock);
1867                                 return open_file;
1868                         } else {
1869                                 if (!inv_file)
1870                                         inv_file = open_file;
1871                         }
1872                 }
1873         }
1874         /* couldn't find useable FH with same pid, try any available */
1875         if (!any_available) {
1876                 any_available = true;
1877                 goto refind_writable;
1878         }
1879
1880         if (inv_file) {
1881                 any_available = false;
1882                 cifsFileInfo_get(inv_file);
1883         }
1884
1885         spin_unlock(&tcon->open_file_lock);
1886
1887         if (inv_file) {
1888                 rc = cifs_reopen_file(inv_file, false);
1889                 if (!rc)
1890                         return inv_file;
1891                 else {
1892                         spin_lock(&tcon->open_file_lock);
1893                         list_move_tail(&inv_file->flist,
1894                                         &cifs_inode->openFileList);
1895                         spin_unlock(&tcon->open_file_lock);
1896                         cifsFileInfo_put(inv_file);
1897                         ++refind;
1898                         inv_file = NULL;
1899                         spin_lock(&tcon->open_file_lock);
1900                         goto refind_writable;
1901                 }
1902         }
1903
1904         return NULL;
1905 }
1906
1907 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1908 {
1909         struct address_space *mapping = page->mapping;
1910         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1911         char *write_data;
1912         int rc = -EFAULT;
1913         int bytes_written = 0;
1914         struct inode *inode;
1915         struct cifsFileInfo *open_file;
1916
1917         if (!mapping || !mapping->host)
1918                 return -EFAULT;
1919
1920         inode = page->mapping->host;
1921
1922         offset += (loff_t)from;
1923         write_data = kmap(page);
1924         write_data += from;
1925
1926         if ((to > PAGE_SIZE) || (from > to)) {
1927                 kunmap(page);
1928                 return -EIO;
1929         }
1930
1931         /* racing with truncate? */
1932         if (offset > mapping->host->i_size) {
1933                 kunmap(page);
1934                 return 0; /* don't care */
1935         }
1936
1937         /* check to make sure that we are not extending the file */
1938         if (mapping->host->i_size - offset < (loff_t)to)
1939                 to = (unsigned)(mapping->host->i_size - offset);
1940
1941         open_file = find_writable_file(CIFS_I(mapping->host), false);
1942         if (open_file) {
1943                 bytes_written = cifs_write(open_file, open_file->pid,
1944                                            write_data, to - from, &offset);
1945                 cifsFileInfo_put(open_file);
1946                 /* Does mm or vfs already set times? */
1947                 inode->i_atime = inode->i_mtime = current_time(inode);
1948                 if ((bytes_written > 0) && (offset))
1949                         rc = 0;
1950                 else if (bytes_written < 0)
1951                         rc = bytes_written;
1952         } else {
1953                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1954                 rc = -EIO;
1955         }
1956
1957         kunmap(page);
1958         return rc;
1959 }
1960
1961 static struct cifs_writedata *
1962 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1963                           pgoff_t end, pgoff_t *index,
1964                           unsigned int *found_pages)
1965 {
1966         unsigned int nr_pages;
1967         struct page **pages;
1968         struct cifs_writedata *wdata;
1969
1970         wdata = cifs_writedata_alloc((unsigned int)tofind,
1971                                      cifs_writev_complete);
1972         if (!wdata)
1973                 return NULL;
1974
1975         /*
1976          * find_get_pages_tag seems to return a max of 256 on each
1977          * iteration, so we must call it several times in order to
1978          * fill the array or the wsize is effectively limited to
1979          * 256 * PAGE_SIZE.
1980          */
1981         *found_pages = 0;
1982         pages = wdata->pages;
1983         do {
1984                 nr_pages = find_get_pages_tag(mapping, index,
1985                                               PAGECACHE_TAG_DIRTY, tofind,
1986                                               pages);
1987                 *found_pages += nr_pages;
1988                 tofind -= nr_pages;
1989                 pages += nr_pages;
1990         } while (nr_pages && tofind && *index <= end);
1991
1992         return wdata;
1993 }
1994
1995 static unsigned int
1996 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1997                     struct address_space *mapping,
1998                     struct writeback_control *wbc,
1999                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2000 {
2001         unsigned int nr_pages = 0, i;
2002         struct page *page;
2003
2004         for (i = 0; i < found_pages; i++) {
2005                 page = wdata->pages[i];
2006                 /*
2007                  * At this point we hold neither mapping->tree_lock nor
2008                  * lock on the page itself: the page may be truncated or
2009                  * invalidated (changing page->mapping to NULL), or even
2010                  * swizzled back from swapper_space to tmpfs file
2011                  * mapping
2012                  */
2013
2014                 if (nr_pages == 0)
2015                         lock_page(page);
2016                 else if (!trylock_page(page))
2017                         break;
2018
2019                 if (unlikely(page->mapping != mapping)) {
2020                         unlock_page(page);
2021                         break;
2022                 }
2023
2024                 if (!wbc->range_cyclic && page->index > end) {
2025                         *done = true;
2026                         unlock_page(page);
2027                         break;
2028                 }
2029
2030                 if (*next && (page->index != *next)) {
2031                         /* Not next consecutive page */
2032                         unlock_page(page);
2033                         break;
2034                 }
2035
2036                 if (wbc->sync_mode != WB_SYNC_NONE)
2037                         wait_on_page_writeback(page);
2038
2039                 if (PageWriteback(page) ||
2040                                 !clear_page_dirty_for_io(page)) {
2041                         unlock_page(page);
2042                         break;
2043                 }
2044
2045                 /*
2046                  * This actually clears the dirty bit in the radix tree.
2047                  * See cifs_writepage() for more commentary.
2048                  */
2049                 set_page_writeback(page);
2050                 if (page_offset(page) >= i_size_read(mapping->host)) {
2051                         *done = true;
2052                         unlock_page(page);
2053                         end_page_writeback(page);
2054                         break;
2055                 }
2056
2057                 wdata->pages[i] = page;
2058                 *next = page->index + 1;
2059                 ++nr_pages;
2060         }
2061
2062         /* reset index to refind any pages skipped */
2063         if (nr_pages == 0)
2064                 *index = wdata->pages[0]->index + 1;
2065
2066         /* put any pages we aren't going to use */
2067         for (i = nr_pages; i < found_pages; i++) {
2068                 put_page(wdata->pages[i]);
2069                 wdata->pages[i] = NULL;
2070         }
2071
2072         return nr_pages;
2073 }
2074
2075 static int
2076 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2077                  struct address_space *mapping, struct writeback_control *wbc)
2078 {
2079         int rc = 0;
2080         struct TCP_Server_Info *server;
2081         unsigned int i;
2082
2083         wdata->sync_mode = wbc->sync_mode;
2084         wdata->nr_pages = nr_pages;
2085         wdata->offset = page_offset(wdata->pages[0]);
2086         wdata->pagesz = PAGE_SIZE;
2087         wdata->tailsz = min(i_size_read(mapping->host) -
2088                         page_offset(wdata->pages[nr_pages - 1]),
2089                         (loff_t)PAGE_SIZE);
2090         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2091
2092         if (wdata->cfile != NULL)
2093                 cifsFileInfo_put(wdata->cfile);
2094         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2095         if (!wdata->cfile) {
2096                 cifs_dbg(VFS, "No writable handles for inode\n");
2097                 rc = -EBADF;
2098         } else {
2099                 wdata->pid = wdata->cfile->pid;
2100                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2101                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2102         }
2103
2104         for (i = 0; i < nr_pages; ++i)
2105                 unlock_page(wdata->pages[i]);
2106
2107         return rc;
2108 }
2109
2110 static int cifs_writepages(struct address_space *mapping,
2111                            struct writeback_control *wbc)
2112 {
2113         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2114         struct TCP_Server_Info *server;
2115         bool done = false, scanned = false, range_whole = false;
2116         pgoff_t end, index;
2117         struct cifs_writedata *wdata;
2118         int rc = 0;
2119
2120         /*
2121          * If wsize is smaller than the page cache size, default to writing
2122          * one page at a time via cifs_writepage
2123          */
2124         if (cifs_sb->wsize < PAGE_SIZE)
2125                 return generic_writepages(mapping, wbc);
2126
2127         if (wbc->range_cyclic) {
2128                 index = mapping->writeback_index; /* Start from prev offset */
2129                 end = -1;
2130         } else {
2131                 index = wbc->range_start >> PAGE_SHIFT;
2132                 end = wbc->range_end >> PAGE_SHIFT;
2133                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2134                         range_whole = true;
2135                 scanned = true;
2136         }
2137         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2138 retry:
2139         while (!done && index <= end) {
2140                 unsigned int i, nr_pages, found_pages, wsize, credits;
2141                 pgoff_t next = 0, tofind, saved_index = index;
2142
2143                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2144                                                    &wsize, &credits);
2145                 if (rc)
2146                         break;
2147
2148                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2149
2150                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2151                                                   &found_pages);
2152                 if (!wdata) {
2153                         rc = -ENOMEM;
2154                         add_credits_and_wake_if(server, credits, 0);
2155                         break;
2156                 }
2157
2158                 if (found_pages == 0) {
2159                         kref_put(&wdata->refcount, cifs_writedata_release);
2160                         add_credits_and_wake_if(server, credits, 0);
2161                         break;
2162                 }
2163
2164                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2165                                                end, &index, &next, &done);
2166
2167                 /* nothing to write? */
2168                 if (nr_pages == 0) {
2169                         kref_put(&wdata->refcount, cifs_writedata_release);
2170                         add_credits_and_wake_if(server, credits, 0);
2171                         continue;
2172                 }
2173
2174                 wdata->credits = credits;
2175
2176                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2177
2178                 /* send failure -- clean up the mess */
2179                 if (rc != 0) {
2180                         add_credits_and_wake_if(server, wdata->credits, 0);
2181                         for (i = 0; i < nr_pages; ++i) {
2182                                 if (rc == -EAGAIN)
2183                                         redirty_page_for_writepage(wbc,
2184                                                            wdata->pages[i]);
2185                                 else
2186                                         SetPageError(wdata->pages[i]);
2187                                 end_page_writeback(wdata->pages[i]);
2188                                 put_page(wdata->pages[i]);
2189                         }
2190                         if (rc != -EAGAIN)
2191                                 mapping_set_error(mapping, rc);
2192                 }
2193                 kref_put(&wdata->refcount, cifs_writedata_release);
2194
2195                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2196                         index = saved_index;
2197                         continue;
2198                 }
2199
2200                 wbc->nr_to_write -= nr_pages;
2201                 if (wbc->nr_to_write <= 0)
2202                         done = true;
2203
2204                 index = next;
2205         }
2206
2207         if (!scanned && !done) {
2208                 /*
2209                  * We hit the last page and there is more work to be done: wrap
2210                  * back to the start of the file
2211                  */
2212                 scanned = true;
2213                 index = 0;
2214                 goto retry;
2215         }
2216
2217         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2218                 mapping->writeback_index = index;
2219
2220         return rc;
2221 }
2222
2223 static int
2224 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2225 {
2226         int rc;
2227         unsigned int xid;
2228
2229         xid = get_xid();
2230 /* BB add check for wbc flags */
2231         get_page(page);
2232         if (!PageUptodate(page))
2233                 cifs_dbg(FYI, "ppw - page not up to date\n");
2234
2235         /*
2236          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2237          *
2238          * A writepage() implementation always needs to do either this,
2239          * or re-dirty the page with "redirty_page_for_writepage()" in
2240          * the case of a failure.
2241          *
2242          * Just unlocking the page will cause the radix tree tag-bits
2243          * to fail to update with the state of the page correctly.
2244          */
2245         set_page_writeback(page);
2246 retry_write:
2247         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2248         if (rc == -EAGAIN) {
2249                 if (wbc->sync_mode == WB_SYNC_ALL)
2250                         goto retry_write;
2251                 redirty_page_for_writepage(wbc, page);
2252         } else if (rc != 0) {
2253                 SetPageError(page);
2254                 mapping_set_error(page->mapping, rc);
2255         } else {
2256                 SetPageUptodate(page);
2257         }
2258         end_page_writeback(page);
2259         put_page(page);
2260         free_xid(xid);
2261         return rc;
2262 }
2263
2264 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2265 {
2266         int rc = cifs_writepage_locked(page, wbc);
2267         unlock_page(page);
2268         return rc;
2269 }
2270
2271 static int cifs_write_end(struct file *file, struct address_space *mapping,
2272                         loff_t pos, unsigned len, unsigned copied,
2273                         struct page *page, void *fsdata)
2274 {
2275         int rc;
2276         struct inode *inode = mapping->host;
2277         struct cifsFileInfo *cfile = file->private_data;
2278         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2279         __u32 pid;
2280
2281         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2282                 pid = cfile->pid;
2283         else
2284                 pid = current->tgid;
2285
2286         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2287                  page, pos, copied);
2288
2289         if (PageChecked(page)) {
2290                 if (copied == len)
2291                         SetPageUptodate(page);
2292                 ClearPageChecked(page);
2293         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2294                 SetPageUptodate(page);
2295
2296         if (!PageUptodate(page)) {
2297                 char *page_data;
2298                 unsigned offset = pos & (PAGE_SIZE - 1);
2299                 unsigned int xid;
2300
2301                 xid = get_xid();
2302                 /* this is probably better than directly calling
2303                    partialpage_write since in this function the file handle is
2304                    known which we might as well leverage */
2305                 /* BB check if anything else missing out of ppw
2306                    such as updating last write time */
2307                 page_data = kmap(page);
2308                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2309                 /* if (rc < 0) should we set writebehind rc? */
2310                 kunmap(page);
2311
2312                 free_xid(xid);
2313         } else {
2314                 rc = copied;
2315                 pos += copied;
2316                 set_page_dirty(page);
2317         }
2318
2319         if (rc > 0) {
2320                 spin_lock(&inode->i_lock);
2321                 if (pos > inode->i_size)
2322                         i_size_write(inode, pos);
2323                 spin_unlock(&inode->i_lock);
2324         }
2325
2326         unlock_page(page);
2327         put_page(page);
2328
2329         return rc;
2330 }
2331
2332 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2333                       int datasync)
2334 {
2335         unsigned int xid;
2336         int rc = 0;
2337         struct cifs_tcon *tcon;
2338         struct TCP_Server_Info *server;
2339         struct cifsFileInfo *smbfile = file->private_data;
2340         struct inode *inode = file_inode(file);
2341         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2342
2343         rc = file_write_and_wait_range(file, start, end);
2344         if (rc)
2345                 return rc;
2346         inode_lock(inode);
2347
2348         xid = get_xid();
2349
2350         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2351                  file, datasync);
2352
2353         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2354                 rc = cifs_zap_mapping(inode);
2355                 if (rc) {
2356                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2357                         rc = 0; /* don't care about it in fsync */
2358                 }
2359         }
2360
2361         tcon = tlink_tcon(smbfile->tlink);
2362         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2363                 server = tcon->ses->server;
2364                 if (server->ops->flush)
2365                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2366                 else
2367                         rc = -ENOSYS;
2368         }
2369
2370         free_xid(xid);
2371         inode_unlock(inode);
2372         return rc;
2373 }
2374
2375 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2376 {
2377         unsigned int xid;
2378         int rc = 0;
2379         struct cifs_tcon *tcon;
2380         struct TCP_Server_Info *server;
2381         struct cifsFileInfo *smbfile = file->private_data;
2382         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2383         struct inode *inode = file->f_mapping->host;
2384
2385         rc = file_write_and_wait_range(file, start, end);
2386         if (rc)
2387                 return rc;
2388         inode_lock(inode);
2389
2390         xid = get_xid();
2391
2392         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2393                  file, datasync);
2394
2395         tcon = tlink_tcon(smbfile->tlink);
2396         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2397                 server = tcon->ses->server;
2398                 if (server->ops->flush)
2399                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2400                 else
2401                         rc = -ENOSYS;
2402         }
2403
2404         free_xid(xid);
2405         inode_unlock(inode);
2406         return rc;
2407 }
2408
2409 /*
2410  * As file closes, flush all cached write data for this inode checking
2411  * for write behind errors.
2412  */
2413 int cifs_flush(struct file *file, fl_owner_t id)
2414 {
2415         struct inode *inode = file_inode(file);
2416         int rc = 0;
2417
2418         if (file->f_mode & FMODE_WRITE)
2419                 rc = filemap_write_and_wait(inode->i_mapping);
2420
2421         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2422
2423         return rc;
2424 }
2425
2426 static int
2427 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2428 {
2429         int rc = 0;
2430         unsigned long i;
2431
2432         for (i = 0; i < num_pages; i++) {
2433                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2434                 if (!pages[i]) {
2435                         /*
2436                          * save number of pages we have already allocated and
2437                          * return with ENOMEM error
2438                          */
2439                         num_pages = i;
2440                         rc = -ENOMEM;
2441                         break;
2442                 }
2443         }
2444
2445         if (rc) {
2446                 for (i = 0; i < num_pages; i++)
2447                         put_page(pages[i]);
2448         }
2449         return rc;
2450 }
2451
2452 static inline
2453 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2454 {
2455         size_t num_pages;
2456         size_t clen;
2457
2458         clen = min_t(const size_t, len, wsize);
2459         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2460
2461         if (cur_len)
2462                 *cur_len = clen;
2463
2464         return num_pages;
2465 }
2466
2467 static void
2468 cifs_uncached_writedata_release(struct kref *refcount)
2469 {
2470         int i;
2471         struct cifs_writedata *wdata = container_of(refcount,
2472                                         struct cifs_writedata, refcount);
2473
2474         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2475         for (i = 0; i < wdata->nr_pages; i++)
2476                 put_page(wdata->pages[i]);
2477         cifs_writedata_release(refcount);
2478 }
2479
2480 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2481
2482 static void
2483 cifs_uncached_writev_complete(struct work_struct *work)
2484 {
2485         struct cifs_writedata *wdata = container_of(work,
2486                                         struct cifs_writedata, work);
2487         struct inode *inode = d_inode(wdata->cfile->dentry);
2488         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2489
2490         spin_lock(&inode->i_lock);
2491         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2492         if (cifsi->server_eof > inode->i_size)
2493                 i_size_write(inode, cifsi->server_eof);
2494         spin_unlock(&inode->i_lock);
2495
2496         complete(&wdata->done);
2497         collect_uncached_write_data(wdata->ctx);
2498         /* the below call can possibly free the last ref to aio ctx */
2499         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2500 }
2501
2502 static int
2503 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2504                       size_t *len, unsigned long *num_pages)
2505 {
2506         size_t save_len, copied, bytes, cur_len = *len;
2507         unsigned long i, nr_pages = *num_pages;
2508
2509         save_len = cur_len;
2510         for (i = 0; i < nr_pages; i++) {
2511                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2512                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2513                 cur_len -= copied;
2514                 /*
2515                  * If we didn't copy as much as we expected, then that
2516                  * may mean we trod into an unmapped area. Stop copying
2517                  * at that point. On the next pass through the big
2518                  * loop, we'll likely end up getting a zero-length
2519                  * write and bailing out of it.
2520                  */
2521                 if (copied < bytes)
2522                         break;
2523         }
2524         cur_len = save_len - cur_len;
2525         *len = cur_len;
2526
2527         /*
2528          * If we have no data to send, then that probably means that
2529          * the copy above failed altogether. That's most likely because
2530          * the address in the iovec was bogus. Return -EFAULT and let
2531          * the caller free anything we allocated and bail out.
2532          */
2533         if (!cur_len)
2534                 return -EFAULT;
2535
2536         /*
2537          * i + 1 now represents the number of pages we actually used in
2538          * the copy phase above.
2539          */
2540         *num_pages = i + 1;
2541         return 0;
2542 }
2543
2544 static int
2545 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2546                      struct cifsFileInfo *open_file,
2547                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2548                      struct cifs_aio_ctx *ctx)
2549 {
2550         int rc = 0;
2551         size_t cur_len;
2552         unsigned long nr_pages, num_pages, i;
2553         struct cifs_writedata *wdata;
2554         struct iov_iter saved_from = *from;
2555         loff_t saved_offset = offset;
2556         pid_t pid;
2557         struct TCP_Server_Info *server;
2558
2559         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2560                 pid = open_file->pid;
2561         else
2562                 pid = current->tgid;
2563
2564         server = tlink_tcon(open_file->tlink)->ses->server;
2565
2566         do {
2567                 unsigned int wsize, credits;
2568
2569                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2570                                                    &wsize, &credits);
2571                 if (rc)
2572                         break;
2573
2574                 nr_pages = get_numpages(wsize, len, &cur_len);
2575                 wdata = cifs_writedata_alloc(nr_pages,
2576                                              cifs_uncached_writev_complete);
2577                 if (!wdata) {
2578                         rc = -ENOMEM;
2579                         add_credits_and_wake_if(server, credits, 0);
2580                         break;
2581                 }
2582
2583                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2584                 if (rc) {
2585                         kfree(wdata);
2586                         add_credits_and_wake_if(server, credits, 0);
2587                         break;
2588                 }
2589
2590                 num_pages = nr_pages;
2591                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2592                 if (rc) {
2593                         for (i = 0; i < nr_pages; i++)
2594                                 put_page(wdata->pages[i]);
2595                         kfree(wdata);
2596                         add_credits_and_wake_if(server, credits, 0);
2597                         break;
2598                 }
2599
2600                 /*
2601                  * Bring nr_pages down to the number of pages we actually used,
2602                  * and free any pages that we didn't use.
2603                  */
2604                 for ( ; nr_pages > num_pages; nr_pages--)
2605                         put_page(wdata->pages[nr_pages - 1]);
2606
2607                 wdata->sync_mode = WB_SYNC_ALL;
2608                 wdata->nr_pages = nr_pages;
2609                 wdata->offset = (__u64)offset;
2610                 wdata->cfile = cifsFileInfo_get(open_file);
2611                 wdata->pid = pid;
2612                 wdata->bytes = cur_len;
2613                 wdata->pagesz = PAGE_SIZE;
2614                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2615                 wdata->credits = credits;
2616                 wdata->ctx = ctx;
2617                 kref_get(&ctx->refcount);
2618
2619                 if (!wdata->cfile->invalidHandle ||
2620                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2621                         rc = server->ops->async_writev(wdata,
2622                                         cifs_uncached_writedata_release);
2623                 if (rc) {
2624                         add_credits_and_wake_if(server, wdata->credits, 0);
2625                         kref_put(&wdata->refcount,
2626                                  cifs_uncached_writedata_release);
2627                         if (rc == -EAGAIN) {
2628                                 *from = saved_from;
2629                                 iov_iter_advance(from, offset - saved_offset);
2630                                 continue;
2631                         }
2632                         break;
2633                 }
2634
2635                 list_add_tail(&wdata->list, wdata_list);
2636                 offset += cur_len;
2637                 len -= cur_len;
2638         } while (len > 0);
2639
2640         return rc;
2641 }
2642
2643 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2644 {
2645         struct cifs_writedata *wdata, *tmp;
2646         struct cifs_tcon *tcon;
2647         struct cifs_sb_info *cifs_sb;
2648         struct dentry *dentry = ctx->cfile->dentry;
2649         unsigned int i;
2650         int rc;
2651
2652         tcon = tlink_tcon(ctx->cfile->tlink);
2653         cifs_sb = CIFS_SB(dentry->d_sb);
2654
2655         mutex_lock(&ctx->aio_mutex);
2656
2657         if (list_empty(&ctx->list)) {
2658                 mutex_unlock(&ctx->aio_mutex);
2659                 return;
2660         }
2661
2662         rc = ctx->rc;
2663         /*
2664          * Wait for and collect replies for any successful sends in order of
2665          * increasing offset. Once an error is hit, then return without waiting
2666          * for any more replies.
2667          */
2668 restart_loop:
2669         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2670                 if (!rc) {
2671                         if (!try_wait_for_completion(&wdata->done)) {
2672                                 mutex_unlock(&ctx->aio_mutex);
2673                                 return;
2674                         }
2675
2676                         if (wdata->result)
2677                                 rc = wdata->result;
2678                         else
2679                                 ctx->total_len += wdata->bytes;
2680
2681                         /* resend call if it's a retryable error */
2682                         if (rc == -EAGAIN) {
2683                                 struct list_head tmp_list;
2684                                 struct iov_iter tmp_from = ctx->iter;
2685
2686                                 INIT_LIST_HEAD(&tmp_list);
2687                                 list_del_init(&wdata->list);
2688
2689                                 iov_iter_advance(&tmp_from,
2690                                                  wdata->offset - ctx->pos);
2691
2692                                 rc = cifs_write_from_iter(wdata->offset,
2693                                                 wdata->bytes, &tmp_from,
2694                                                 ctx->cfile, cifs_sb, &tmp_list,
2695                                                 ctx);
2696
2697                                 list_splice(&tmp_list, &ctx->list);
2698
2699                                 kref_put(&wdata->refcount,
2700                                          cifs_uncached_writedata_release);
2701                                 goto restart_loop;
2702                         }
2703                 }
2704                 list_del_init(&wdata->list);
2705                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2706         }
2707
2708         for (i = 0; i < ctx->npages; i++)
2709                 put_page(ctx->bv[i].bv_page);
2710
2711         cifs_stats_bytes_written(tcon, ctx->total_len);
2712         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2713
2714         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2715
2716         mutex_unlock(&ctx->aio_mutex);
2717
2718         if (ctx->iocb && ctx->iocb->ki_complete)
2719                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2720         else
2721                 complete(&ctx->done);
2722 }
2723
2724 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2725 {
2726         struct file *file = iocb->ki_filp;
2727         ssize_t total_written = 0;
2728         struct cifsFileInfo *cfile;
2729         struct cifs_tcon *tcon;
2730         struct cifs_sb_info *cifs_sb;
2731         struct cifs_aio_ctx *ctx;
2732         struct iov_iter saved_from = *from;
2733         int rc;
2734
2735         /*
2736          * BB - optimize the way when signing is disabled. We can drop this
2737          * extra memory-to-memory copying and use iovec buffers for constructing
2738          * write request.
2739          */
2740
2741         rc = generic_write_checks(iocb, from);
2742         if (rc <= 0)
2743                 return rc;
2744
2745         cifs_sb = CIFS_FILE_SB(file);
2746         cfile = file->private_data;
2747         tcon = tlink_tcon(cfile->tlink);
2748
2749         if (!tcon->ses->server->ops->async_writev)
2750                 return -ENOSYS;
2751
2752         ctx = cifs_aio_ctx_alloc();
2753         if (!ctx)
2754                 return -ENOMEM;
2755
2756         ctx->cfile = cifsFileInfo_get(cfile);
2757
2758         if (!is_sync_kiocb(iocb))
2759                 ctx->iocb = iocb;
2760
2761         ctx->pos = iocb->ki_pos;
2762
2763         rc = setup_aio_ctx_iter(ctx, from, WRITE);
2764         if (rc) {
2765                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2766                 return rc;
2767         }
2768
2769         /* grab a lock here due to read response handlers can access ctx */
2770         mutex_lock(&ctx->aio_mutex);
2771
2772         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2773                                   cfile, cifs_sb, &ctx->list, ctx);
2774
2775         /*
2776          * If at least one write was successfully sent, then discard any rc
2777          * value from the later writes. If the other write succeeds, then
2778          * we'll end up returning whatever was written. If it fails, then
2779          * we'll get a new rc value from that.
2780          */
2781         if (!list_empty(&ctx->list))
2782                 rc = 0;
2783
2784         mutex_unlock(&ctx->aio_mutex);
2785
2786         if (rc) {
2787                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2788                 return rc;
2789         }
2790
2791         if (!is_sync_kiocb(iocb)) {
2792                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2793                 return -EIOCBQUEUED;
2794         }
2795
2796         rc = wait_for_completion_killable(&ctx->done);
2797         if (rc) {
2798                 mutex_lock(&ctx->aio_mutex);
2799                 ctx->rc = rc = -EINTR;
2800                 total_written = ctx->total_len;
2801                 mutex_unlock(&ctx->aio_mutex);
2802         } else {
2803                 rc = ctx->rc;
2804                 total_written = ctx->total_len;
2805         }
2806
2807         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2808
2809         if (unlikely(!total_written))
2810                 return rc;
2811
2812         iocb->ki_pos += total_written;
2813         return total_written;
2814 }
2815
2816 static ssize_t
2817 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2818 {
2819         struct file *file = iocb->ki_filp;
2820         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2821         struct inode *inode = file->f_mapping->host;
2822         struct cifsInodeInfo *cinode = CIFS_I(inode);
2823         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2824         ssize_t rc;
2825
2826         inode_lock(inode);
2827         /*
2828          * We need to hold the sem to be sure nobody modifies lock list
2829          * with a brlock that prevents writing.
2830          */
2831         down_read(&cinode->lock_sem);
2832
2833         rc = generic_write_checks(iocb, from);
2834         if (rc <= 0)
2835                 goto out;
2836
2837         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2838                                      server->vals->exclusive_lock_type, NULL,
2839                                      CIFS_WRITE_OP))
2840                 rc = __generic_file_write_iter(iocb, from);
2841         else
2842                 rc = -EACCES;
2843 out:
2844         up_read(&cinode->lock_sem);
2845         inode_unlock(inode);
2846
2847         if (rc > 0)
2848                 rc = generic_write_sync(iocb, rc);
2849         return rc;
2850 }
2851
2852 ssize_t
2853 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2854 {
2855         struct inode *inode = file_inode(iocb->ki_filp);
2856         struct cifsInodeInfo *cinode = CIFS_I(inode);
2857         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2858         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2859                                                 iocb->ki_filp->private_data;
2860         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2861         ssize_t written;
2862
2863         written = cifs_get_writer(cinode);
2864         if (written)
2865                 return written;
2866
2867         if (CIFS_CACHE_WRITE(cinode)) {
2868                 if (cap_unix(tcon->ses) &&
2869                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2870                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2871                         written = generic_file_write_iter(iocb, from);
2872                         goto out;
2873                 }
2874                 written = cifs_writev(iocb, from);
2875                 goto out;
2876         }
2877         /*
2878          * For non-oplocked files in strict cache mode we need to write the data
2879          * to the server exactly from the pos to pos+len-1 rather than flush all
2880          * affected pages because it may cause a error with mandatory locks on
2881          * these pages but not on the region from pos to ppos+len-1.
2882          */
2883         written = cifs_user_writev(iocb, from);
2884         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2885                 /*
2886                  * Windows 7 server can delay breaking level2 oplock if a write
2887                  * request comes - break it on the client to prevent reading
2888                  * an old data.
2889                  */
2890                 cifs_zap_mapping(inode);
2891                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2892                          inode);
2893                 cinode->oplock = 0;
2894         }
2895 out:
2896         cifs_put_writer(cinode);
2897         return written;
2898 }
2899
2900 static struct cifs_readdata *
2901 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2902 {
2903         struct cifs_readdata *rdata;
2904
2905         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2906                         GFP_KERNEL);
2907         if (rdata != NULL) {
2908                 kref_init(&rdata->refcount);
2909                 INIT_LIST_HEAD(&rdata->list);
2910                 init_completion(&rdata->done);
2911                 INIT_WORK(&rdata->work, complete);
2912         }
2913
2914         return rdata;
2915 }
2916
2917 void
2918 cifs_readdata_release(struct kref *refcount)
2919 {
2920         struct cifs_readdata *rdata = container_of(refcount,
2921                                         struct cifs_readdata, refcount);
2922
2923         if (rdata->cfile)
2924                 cifsFileInfo_put(rdata->cfile);
2925
2926         kfree(rdata);
2927 }
2928
2929 static int
2930 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2931 {
2932         int rc = 0;
2933         struct page *page;
2934         unsigned int i;
2935
2936         for (i = 0; i < nr_pages; i++) {
2937                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2938                 if (!page) {
2939                         rc = -ENOMEM;
2940                         break;
2941                 }
2942                 rdata->pages[i] = page;
2943         }
2944
2945         if (rc) {
2946                 for (i = 0; i < nr_pages; i++) {
2947                         put_page(rdata->pages[i]);
2948                         rdata->pages[i] = NULL;
2949                 }
2950         }
2951         return rc;
2952 }
2953
2954 static void
2955 cifs_uncached_readdata_release(struct kref *refcount)
2956 {
2957         struct cifs_readdata *rdata = container_of(refcount,
2958                                         struct cifs_readdata, refcount);
2959         unsigned int i;
2960
2961         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
2962         for (i = 0; i < rdata->nr_pages; i++) {
2963                 put_page(rdata->pages[i]);
2964                 rdata->pages[i] = NULL;
2965         }
2966         cifs_readdata_release(refcount);
2967 }
2968
2969 /**
2970  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2971  * @rdata:      the readdata response with list of pages holding data
2972  * @iter:       destination for our data
2973  *
2974  * This function copies data from a list of pages in a readdata response into
2975  * an array of iovecs. It will first calculate where the data should go
2976  * based on the info in the readdata and then copy the data into that spot.
2977  */
2978 static int
2979 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2980 {
2981         size_t remaining = rdata->got_bytes;
2982         unsigned int i;
2983
2984         for (i = 0; i < rdata->nr_pages; i++) {
2985                 struct page *page = rdata->pages[i];
2986                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2987                 size_t written;
2988
2989                 if (unlikely(iter->type & ITER_PIPE)) {
2990                         void *addr = kmap_atomic(page);
2991
2992                         written = copy_to_iter(addr, copy, iter);
2993                         kunmap_atomic(addr);
2994                 } else
2995                         written = copy_page_to_iter(page, 0, copy, iter);
2996                 remaining -= written;
2997                 if (written < copy && iov_iter_count(iter) > 0)
2998                         break;
2999         }
3000         return remaining ? -EFAULT : 0;
3001 }
3002
3003 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3004
3005 static void
3006 cifs_uncached_readv_complete(struct work_struct *work)
3007 {
3008         struct cifs_readdata *rdata = container_of(work,
3009                                                 struct cifs_readdata, work);
3010
3011         complete(&rdata->done);
3012         collect_uncached_read_data(rdata->ctx);
3013         /* the below call can possibly free the last ref to aio ctx */
3014         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3015 }
3016
3017 static int
3018 uncached_fill_pages(struct TCP_Server_Info *server,
3019                     struct cifs_readdata *rdata, struct iov_iter *iter,
3020                     unsigned int len)
3021 {
3022         int result = 0;
3023         unsigned int i;
3024         unsigned int nr_pages = rdata->nr_pages;
3025
3026         rdata->got_bytes = 0;
3027         rdata->tailsz = PAGE_SIZE;
3028         for (i = 0; i < nr_pages; i++) {
3029                 struct page *page = rdata->pages[i];
3030                 size_t n;
3031
3032                 if (len <= 0) {
3033                         /* no need to hold page hostage */
3034                         rdata->pages[i] = NULL;
3035                         rdata->nr_pages--;
3036                         put_page(page);
3037                         continue;
3038                 }
3039                 n = len;
3040                 if (len >= PAGE_SIZE) {
3041                         /* enough data to fill the page */
3042                         n = PAGE_SIZE;
3043                         len -= n;
3044                 } else {
3045                         zero_user(page, len, PAGE_SIZE - len);
3046                         rdata->tailsz = len;
3047                         len = 0;
3048                 }
3049                 if (iter)
3050                         result = copy_page_from_iter(page, 0, n, iter);
3051                 else
3052                         result = cifs_read_page_from_socket(server, page, n);
3053                 if (result < 0)
3054                         break;
3055
3056                 rdata->got_bytes += result;
3057         }
3058
3059         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3060                                                 rdata->got_bytes : result;
3061 }
3062
3063 static int
3064 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3065                               struct cifs_readdata *rdata, unsigned int len)
3066 {
3067         return uncached_fill_pages(server, rdata, NULL, len);
3068 }
3069
3070 static int
3071 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3072                               struct cifs_readdata *rdata,
3073                               struct iov_iter *iter)
3074 {
3075         return uncached_fill_pages(server, rdata, iter, iter->count);
3076 }
3077
3078 static int
3079 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3080                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3081                      struct cifs_aio_ctx *ctx)
3082 {
3083         struct cifs_readdata *rdata;
3084         unsigned int npages, rsize, credits;
3085         size_t cur_len;
3086         int rc;
3087         pid_t pid;
3088         struct TCP_Server_Info *server;
3089
3090         server = tlink_tcon(open_file->tlink)->ses->server;
3091
3092         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3093                 pid = open_file->pid;
3094         else
3095                 pid = current->tgid;
3096
3097         do {
3098                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3099                                                    &rsize, &credits);
3100                 if (rc)
3101                         break;
3102
3103                 cur_len = min_t(const size_t, len, rsize);
3104                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3105
3106                 /* allocate a readdata struct */
3107                 rdata = cifs_readdata_alloc(npages,
3108                                             cifs_uncached_readv_complete);
3109                 if (!rdata) {
3110                         add_credits_and_wake_if(server, credits, 0);
3111                         rc = -ENOMEM;
3112                         break;
3113                 }
3114
3115                 rc = cifs_read_allocate_pages(rdata, npages);
3116                 if (rc)
3117                         goto error;
3118
3119                 rdata->cfile = cifsFileInfo_get(open_file);
3120                 rdata->nr_pages = npages;
3121                 rdata->offset = offset;
3122                 rdata->bytes = cur_len;
3123                 rdata->pid = pid;
3124                 rdata->pagesz = PAGE_SIZE;
3125                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3126                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3127                 rdata->credits = credits;
3128                 rdata->ctx = ctx;
3129                 kref_get(&ctx->refcount);
3130
3131                 if (!rdata->cfile->invalidHandle ||
3132                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3133                         rc = server->ops->async_readv(rdata);
3134 error:
3135                 if (rc) {
3136                         add_credits_and_wake_if(server, rdata->credits, 0);
3137                         kref_put(&rdata->refcount,
3138                                  cifs_uncached_readdata_release);
3139                         if (rc == -EAGAIN)
3140                                 continue;
3141                         break;
3142                 }
3143
3144                 list_add_tail(&rdata->list, rdata_list);
3145                 offset += cur_len;
3146                 len -= cur_len;
3147         } while (len > 0);
3148
3149         return rc;
3150 }
3151
3152 static void
3153 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3154 {
3155         struct cifs_readdata *rdata, *tmp;
3156         struct iov_iter *to = &ctx->iter;
3157         struct cifs_sb_info *cifs_sb;
3158         struct cifs_tcon *tcon;
3159         unsigned int i;
3160         int rc;
3161
3162         tcon = tlink_tcon(ctx->cfile->tlink);
3163         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3164
3165         mutex_lock(&ctx->aio_mutex);
3166
3167         if (list_empty(&ctx->list)) {
3168                 mutex_unlock(&ctx->aio_mutex);
3169                 return;
3170         }
3171
3172         rc = ctx->rc;
3173         /* the loop below should proceed in the order of increasing offsets */
3174 again:
3175         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3176                 if (!rc) {
3177                         if (!try_wait_for_completion(&rdata->done)) {
3178                                 mutex_unlock(&ctx->aio_mutex);
3179                                 return;
3180                         }
3181
3182                         if (rdata->result == -EAGAIN) {
3183                                 /* resend call if it's a retryable error */
3184                                 struct list_head tmp_list;
3185                                 unsigned int got_bytes = rdata->got_bytes;
3186
3187                                 list_del_init(&rdata->list);
3188                                 INIT_LIST_HEAD(&tmp_list);
3189
3190                                 /*
3191                                  * Got a part of data and then reconnect has
3192                                  * happened -- fill the buffer and continue
3193                                  * reading.
3194                                  */
3195                                 if (got_bytes && got_bytes < rdata->bytes) {
3196                                         rc = cifs_readdata_to_iov(rdata, to);
3197                                         if (rc) {
3198                                                 kref_put(&rdata->refcount,
3199                                                 cifs_uncached_readdata_release);
3200                                                 continue;
3201                                         }
3202                                 }
3203
3204                                 rc = cifs_send_async_read(
3205                                                 rdata->offset + got_bytes,
3206                                                 rdata->bytes - got_bytes,
3207                                                 rdata->cfile, cifs_sb,
3208                                                 &tmp_list, ctx);
3209
3210                                 list_splice(&tmp_list, &ctx->list);
3211
3212                                 kref_put(&rdata->refcount,
3213                                          cifs_uncached_readdata_release);
3214                                 goto again;
3215                         } else if (rdata->result)
3216                                 rc = rdata->result;
3217                         else
3218                                 rc = cifs_readdata_to_iov(rdata, to);
3219
3220                         /* if there was a short read -- discard anything left */
3221                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3222                                 rc = -ENODATA;
3223                 }
3224                 list_del_init(&rdata->list);
3225                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3226         }
3227
3228         for (i = 0; i < ctx->npages; i++) {
3229                 if (ctx->should_dirty)
3230                         set_page_dirty(ctx->bv[i].bv_page);
3231                 put_page(ctx->bv[i].bv_page);
3232         }
3233
3234         ctx->total_len = ctx->len - iov_iter_count(to);
3235
3236         cifs_stats_bytes_read(tcon, ctx->total_len);
3237
3238         /* mask nodata case */
3239         if (rc == -ENODATA)
3240                 rc = 0;
3241
3242         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3243
3244         mutex_unlock(&ctx->aio_mutex);
3245
3246         if (ctx->iocb && ctx->iocb->ki_complete)
3247                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3248         else
3249                 complete(&ctx->done);
3250 }
3251
3252 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3253 {
3254         struct file *file = iocb->ki_filp;
3255         ssize_t rc;
3256         size_t len;
3257         ssize_t total_read = 0;
3258         loff_t offset = iocb->ki_pos;
3259         struct cifs_sb_info *cifs_sb;
3260         struct cifs_tcon *tcon;
3261         struct cifsFileInfo *cfile;
3262         struct cifs_aio_ctx *ctx;
3263
3264         len = iov_iter_count(to);
3265         if (!len)
3266                 return 0;
3267
3268         cifs_sb = CIFS_FILE_SB(file);
3269         cfile = file->private_data;
3270         tcon = tlink_tcon(cfile->tlink);
3271
3272         if (!tcon->ses->server->ops->async_readv)
3273                 return -ENOSYS;
3274
3275         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3276                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3277
3278         ctx = cifs_aio_ctx_alloc();
3279         if (!ctx)
3280                 return -ENOMEM;
3281
3282         ctx->cfile = cifsFileInfo_get(cfile);
3283
3284         if (!is_sync_kiocb(iocb))
3285                 ctx->iocb = iocb;
3286
3287         if (to->type == ITER_IOVEC)
3288                 ctx->should_dirty = true;
3289
3290         rc = setup_aio_ctx_iter(ctx, to, READ);
3291         if (rc) {
3292                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3293                 return rc;
3294         }
3295
3296         len = ctx->len;
3297
3298         /* grab a lock here due to read response handlers can access ctx */
3299         mutex_lock(&ctx->aio_mutex);
3300
3301         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3302
3303         /* if at least one read request send succeeded, then reset rc */
3304         if (!list_empty(&ctx->list))
3305                 rc = 0;
3306
3307         mutex_unlock(&ctx->aio_mutex);
3308
3309         if (rc) {
3310                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3311                 return rc;
3312         }
3313
3314         if (!is_sync_kiocb(iocb)) {
3315                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3316                 return -EIOCBQUEUED;
3317         }
3318
3319         rc = wait_for_completion_killable(&ctx->done);
3320         if (rc) {
3321                 mutex_lock(&ctx->aio_mutex);
3322                 ctx->rc = rc = -EINTR;
3323                 total_read = ctx->total_len;
3324                 mutex_unlock(&ctx->aio_mutex);
3325         } else {
3326                 rc = ctx->rc;
3327                 total_read = ctx->total_len;
3328         }
3329
3330         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3331
3332         if (total_read) {
3333                 iocb->ki_pos += total_read;
3334                 return total_read;
3335         }
3336         return rc;
3337 }
3338
3339 ssize_t
3340 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3341 {
3342         struct inode *inode = file_inode(iocb->ki_filp);
3343         struct cifsInodeInfo *cinode = CIFS_I(inode);
3344         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3345         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3346                                                 iocb->ki_filp->private_data;
3347         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3348         int rc = -EACCES;
3349
3350         /*
3351          * In strict cache mode we need to read from the server all the time
3352          * if we don't have level II oplock because the server can delay mtime
3353          * change - so we can't make a decision about inode invalidating.
3354          * And we can also fail with pagereading if there are mandatory locks
3355          * on pages affected by this read but not on the region from pos to
3356          * pos+len-1.
3357          */
3358         if (!CIFS_CACHE_READ(cinode))
3359                 return cifs_user_readv(iocb, to);
3360
3361         if (cap_unix(tcon->ses) &&
3362             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3363             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3364                 return generic_file_read_iter(iocb, to);
3365
3366         /*
3367          * We need to hold the sem to be sure nobody modifies lock list
3368          * with a brlock that prevents reading.
3369          */
3370         down_read(&cinode->lock_sem);
3371         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3372                                      tcon->ses->server->vals->shared_lock_type,
3373                                      NULL, CIFS_READ_OP))
3374                 rc = generic_file_read_iter(iocb, to);
3375         up_read(&cinode->lock_sem);
3376         return rc;
3377 }
3378
3379 static ssize_t
3380 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3381 {
3382         int rc = -EACCES;
3383         unsigned int bytes_read = 0;
3384         unsigned int total_read;
3385         unsigned int current_read_size;
3386         unsigned int rsize;
3387         struct cifs_sb_info *cifs_sb;
3388         struct cifs_tcon *tcon;
3389         struct TCP_Server_Info *server;
3390         unsigned int xid;
3391         char *cur_offset;
3392         struct cifsFileInfo *open_file;
3393         struct cifs_io_parms io_parms;
3394         int buf_type = CIFS_NO_BUFFER;
3395         __u32 pid;
3396
3397         xid = get_xid();
3398         cifs_sb = CIFS_FILE_SB(file);
3399
3400         /* FIXME: set up handlers for larger reads and/or convert to async */
3401         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3402
3403         if (file->private_data == NULL) {
3404                 rc = -EBADF;
3405                 free_xid(xid);
3406                 return rc;
3407         }
3408         open_file = file->private_data;
3409         tcon = tlink_tcon(open_file->tlink);
3410         server = tcon->ses->server;
3411
3412         if (!server->ops->sync_read) {
3413                 free_xid(xid);
3414                 return -ENOSYS;
3415         }
3416
3417         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3418                 pid = open_file->pid;
3419         else
3420                 pid = current->tgid;
3421
3422         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3423                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3424
3425         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3426              total_read += bytes_read, cur_offset += bytes_read) {
3427                 do {
3428                         current_read_size = min_t(uint, read_size - total_read,
3429                                                   rsize);
3430                         /*
3431                          * For windows me and 9x we do not want to request more
3432                          * than it negotiated since it will refuse the read
3433                          * then.
3434                          */
3435                         if ((tcon->ses) && !(tcon->ses->capabilities &
3436                                 tcon->ses->server->vals->cap_large_files)) {
3437                                 current_read_size = min_t(uint,
3438                                         current_read_size, CIFSMaxBufSize);
3439                         }
3440                         if (open_file->invalidHandle) {
3441                                 rc = cifs_reopen_file(open_file, true);
3442                                 if (rc != 0)
3443                                         break;
3444                         }
3445                         io_parms.pid = pid;
3446                         io_parms.tcon = tcon;
3447                         io_parms.offset = *offset;
3448                         io_parms.length = current_read_size;
3449                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3450                                                     &bytes_read, &cur_offset,
3451                                                     &buf_type);
3452                 } while (rc == -EAGAIN);
3453
3454                 if (rc || (bytes_read == 0)) {
3455                         if (total_read) {
3456                                 break;
3457                         } else {
3458                                 free_xid(xid);
3459                                 return rc;
3460                         }
3461                 } else {
3462                         cifs_stats_bytes_read(tcon, total_read);
3463                         *offset += bytes_read;
3464                 }
3465         }
3466         free_xid(xid);
3467         return total_read;
3468 }
3469
3470 /*
3471  * If the page is mmap'ed into a process' page tables, then we need to make
3472  * sure that it doesn't change while being written back.
3473  */
3474 static int
3475 cifs_page_mkwrite(struct vm_fault *vmf)
3476 {
3477         struct page *page = vmf->page;
3478
3479         lock_page(page);
3480         return VM_FAULT_LOCKED;
3481 }
3482
3483 static const struct vm_operations_struct cifs_file_vm_ops = {
3484         .fault = filemap_fault,
3485         .map_pages = filemap_map_pages,
3486         .page_mkwrite = cifs_page_mkwrite,
3487 };
3488
3489 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3490 {
3491         int rc, xid;
3492         struct inode *inode = file_inode(file);
3493
3494         xid = get_xid();
3495
3496         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3497                 rc = cifs_zap_mapping(inode);
3498                 if (rc)
3499                         return rc;
3500         }
3501
3502         rc = generic_file_mmap(file, vma);
3503         if (rc == 0)
3504                 vma->vm_ops = &cifs_file_vm_ops;
3505         free_xid(xid);
3506         return rc;
3507 }
3508
3509 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3510 {
3511         int rc, xid;
3512
3513         xid = get_xid();
3514         rc = cifs_revalidate_file(file);
3515         if (rc) {
3516                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3517                          rc);
3518                 free_xid(xid);
3519                 return rc;
3520         }
3521         rc = generic_file_mmap(file, vma);
3522         if (rc == 0)
3523                 vma->vm_ops = &cifs_file_vm_ops;
3524         free_xid(xid);
3525         return rc;
3526 }
3527
3528 static void
3529 cifs_readv_complete(struct work_struct *work)
3530 {
3531         unsigned int i, got_bytes;
3532         struct cifs_readdata *rdata = container_of(work,
3533                                                 struct cifs_readdata, work);
3534
3535         got_bytes = rdata->got_bytes;
3536         for (i = 0; i < rdata->nr_pages; i++) {
3537                 struct page *page = rdata->pages[i];
3538
3539                 lru_cache_add_file(page);
3540
3541                 if (rdata->result == 0 ||
3542                     (rdata->result == -EAGAIN && got_bytes)) {
3543                         flush_dcache_page(page);
3544                         SetPageUptodate(page);
3545                 }
3546
3547                 unlock_page(page);
3548
3549                 if (rdata->result == 0 ||
3550                     (rdata->result == -EAGAIN && got_bytes))
3551                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3552
3553                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3554
3555                 put_page(page);
3556                 rdata->pages[i] = NULL;
3557         }
3558         kref_put(&rdata->refcount, cifs_readdata_release);
3559 }
3560
3561 static int
3562 readpages_fill_pages(struct TCP_Server_Info *server,
3563                      struct cifs_readdata *rdata, struct iov_iter *iter,
3564                      unsigned int len)
3565 {
3566         int result = 0;
3567         unsigned int i;
3568         u64 eof;
3569         pgoff_t eof_index;
3570         unsigned int nr_pages = rdata->nr_pages;
3571
3572         /* determine the eof that the server (probably) has */
3573         eof = CIFS_I(rdata->mapping->host)->server_eof;
3574         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3575         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3576
3577         rdata->got_bytes = 0;
3578         rdata->tailsz = PAGE_SIZE;
3579         for (i = 0; i < nr_pages; i++) {
3580                 struct page *page = rdata->pages[i];
3581                 size_t n = PAGE_SIZE;
3582
3583                 if (len >= PAGE_SIZE) {
3584                         len -= PAGE_SIZE;
3585                 } else if (len > 0) {
3586                         /* enough for partial page, fill and zero the rest */
3587                         zero_user(page, len, PAGE_SIZE - len);
3588                         n = rdata->tailsz = len;
3589                         len = 0;
3590                 } else if (page->index > eof_index) {
3591                         /*
3592                          * The VFS will not try to do readahead past the
3593                          * i_size, but it's possible that we have outstanding
3594                          * writes with gaps in the middle and the i_size hasn't
3595                          * caught up yet. Populate those with zeroed out pages
3596                          * to prevent the VFS from repeatedly attempting to
3597                          * fill them until the writes are flushed.
3598                          */
3599                         zero_user(page, 0, PAGE_SIZE);
3600                         lru_cache_add_file(page);
3601                         flush_dcache_page(page);
3602                         SetPageUptodate(page);
3603                         unlock_page(page);
3604                         put_page(page);
3605                         rdata->pages[i] = NULL;
3606                         rdata->nr_pages--;
3607                         continue;
3608                 } else {
3609                         /* no need to hold page hostage */
3610                         lru_cache_add_file(page);
3611                         unlock_page(page);
3612                         put_page(page);
3613                         rdata->pages[i] = NULL;
3614                         rdata->nr_pages--;
3615                         continue;
3616                 }
3617
3618                 if (iter)
3619                         result = copy_page_from_iter(page, 0, n, iter);
3620                 else
3621                         result = cifs_read_page_from_socket(server, page, n);
3622                 if (result < 0)
3623                         break;
3624
3625                 rdata->got_bytes += result;
3626         }
3627
3628         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3629                                                 rdata->got_bytes : result;
3630 }
3631
3632 static int
3633 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3634                                struct cifs_readdata *rdata, unsigned int len)
3635 {
3636         return readpages_fill_pages(server, rdata, NULL, len);
3637 }
3638
3639 static int
3640 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3641                                struct cifs_readdata *rdata,
3642                                struct iov_iter *iter)
3643 {
3644         return readpages_fill_pages(server, rdata, iter, iter->count);
3645 }
3646
3647 static int
3648 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3649                     unsigned int rsize, struct list_head *tmplist,
3650                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3651 {
3652         struct page *page, *tpage;
3653         unsigned int expected_index;
3654         int rc;
3655         gfp_t gfp = readahead_gfp_mask(mapping);
3656
3657         INIT_LIST_HEAD(tmplist);
3658
3659         page = list_entry(page_list->prev, struct page, lru);
3660
3661         /*
3662          * Lock the page and put it in the cache. Since no one else
3663          * should have access to this page, we're safe to simply set
3664          * PG_locked without checking it first.
3665          */
3666         __SetPageLocked(page);
3667         rc = add_to_page_cache_locked(page, mapping,
3668                                       page->index, gfp);
3669
3670         /* give up if we can't stick it in the cache */
3671         if (rc) {
3672                 __ClearPageLocked(page);
3673                 return rc;
3674         }
3675
3676         /* move first page to the tmplist */
3677         *offset = (loff_t)page->index << PAGE_SHIFT;
3678         *bytes = PAGE_SIZE;
3679         *nr_pages = 1;
3680         list_move_tail(&page->lru, tmplist);
3681
3682         /* now try and add more pages onto the request */
3683         expected_index = page->index + 1;
3684         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3685                 /* discontinuity ? */
3686                 if (page->index != expected_index)
3687                         break;
3688
3689                 /* would this page push the read over the rsize? */
3690                 if (*bytes + PAGE_SIZE > rsize)
3691                         break;
3692
3693                 __SetPageLocked(page);
3694                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3695                         __ClearPageLocked(page);
3696                         break;
3697                 }
3698                 list_move_tail(&page->lru, tmplist);
3699                 (*bytes) += PAGE_SIZE;
3700                 expected_index++;
3701                 (*nr_pages)++;
3702         }
3703         return rc;
3704 }
3705
3706 static int cifs_readpages(struct file *file, struct address_space *mapping,
3707         struct list_head *page_list, unsigned num_pages)
3708 {
3709         int rc;
3710         struct list_head tmplist;
3711         struct cifsFileInfo *open_file = file->private_data;
3712         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3713         struct TCP_Server_Info *server;
3714         pid_t pid;
3715
3716         /*
3717          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3718          * immediately if the cookie is negative
3719          *
3720          * After this point, every page in the list might have PG_fscache set,
3721          * so we will need to clean that up off of every page we don't use.
3722          */
3723         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3724                                          &num_pages);
3725         if (rc == 0)
3726                 return rc;
3727
3728         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3729                 pid = open_file->pid;
3730         else
3731                 pid = current->tgid;
3732
3733         rc = 0;
3734         server = tlink_tcon(open_file->tlink)->ses->server;
3735
3736         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3737                  __func__, file, mapping, num_pages);
3738
3739         /*
3740          * Start with the page at end of list and move it to private