Merge tag '6.8-rc1-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
[sfrench/cifs-2.6.git] / fs / smb / client / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45         struct address_space *mapping = inode->i_mapping;
46         struct folio *folio;
47         pgoff_t end;
48
49         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51         rcu_read_lock();
52
53         end = (start + len - 1) / PAGE_SIZE;
54         xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55                 if (xas_retry(&xas, folio))
56                         continue;
57                 xas_pause(&xas);
58                 rcu_read_unlock();
59                 folio_lock(folio);
60                 folio_clear_dirty_for_io(folio);
61                 folio_unlock(folio);
62                 rcu_read_lock();
63         }
64
65         rcu_read_unlock();
66 }
67
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73         struct address_space *mapping = inode->i_mapping;
74         struct folio *folio;
75         pgoff_t end;
76
77         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79         if (!len)
80                 return;
81
82         rcu_read_lock();
83
84         end = (start + len - 1) / PAGE_SIZE;
85         xas_for_each(&xas, folio, end) {
86                 if (xas_retry(&xas, folio))
87                         continue;
88                 if (!folio_test_writeback(folio)) {
89                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90                                   len, start, folio->index, end);
91                         continue;
92                 }
93
94                 folio_detach_private(folio);
95                 folio_end_writeback(folio);
96         }
97
98         rcu_read_unlock();
99 }
100
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106         struct address_space *mapping = inode->i_mapping;
107         struct folio *folio;
108         pgoff_t end;
109
110         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112         if (!len)
113                 return;
114
115         rcu_read_lock();
116
117         end = (start + len - 1) / PAGE_SIZE;
118         xas_for_each(&xas, folio, end) {
119                 if (xas_retry(&xas, folio))
120                         continue;
121                 if (!folio_test_writeback(folio)) {
122                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123                                   len, start, folio->index, end);
124                         continue;
125                 }
126
127                 folio_set_error(folio);
128                 folio_end_writeback(folio);
129         }
130
131         rcu_read_unlock();
132 }
133
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139         struct address_space *mapping = inode->i_mapping;
140         struct folio *folio;
141         pgoff_t end;
142
143         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145         if (!len)
146                 return;
147
148         rcu_read_lock();
149
150         end = (start + len - 1) / PAGE_SIZE;
151         xas_for_each(&xas, folio, end) {
152                 if (!folio_test_writeback(folio)) {
153                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154                                   len, start, folio->index, end);
155                         continue;
156                 }
157
158                 filemap_dirty_folio(folio->mapping, folio);
159                 folio_end_writeback(folio);
160         }
161
162         rcu_read_unlock();
163 }
164
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172         struct cifsFileInfo *open_file = NULL;
173         struct list_head *tmp;
174         struct list_head *tmp1;
175
176         /* only send once per connect */
177         spin_lock(&tcon->tc_lock);
178         if (tcon->status != TID_NEED_RECON) {
179                 spin_unlock(&tcon->tc_lock);
180                 return;
181         }
182         tcon->status = TID_IN_FILES_INVALIDATE;
183         spin_unlock(&tcon->tc_lock);
184
185         /* list all files open on tree connection and mark them invalid */
186         spin_lock(&tcon->open_file_lock);
187         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
188                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
189                 open_file->invalidHandle = true;
190                 open_file->oplock_break_cancelled = true;
191         }
192         spin_unlock(&tcon->open_file_lock);
193
194         invalidate_all_cached_dirs(tcon);
195         spin_lock(&tcon->tc_lock);
196         if (tcon->status == TID_IN_FILES_INVALIDATE)
197                 tcon->status = TID_NEED_TCON;
198         spin_unlock(&tcon->tc_lock);
199
200         /*
201          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
202          * to this tcon.
203          */
204 }
205
206 static inline int cifs_convert_flags(unsigned int flags)
207 {
208         if ((flags & O_ACCMODE) == O_RDONLY)
209                 return GENERIC_READ;
210         else if ((flags & O_ACCMODE) == O_WRONLY)
211                 return GENERIC_WRITE;
212         else if ((flags & O_ACCMODE) == O_RDWR) {
213                 /* GENERIC_ALL is too much permission to request
214                    can cause unnecessary access denied on create */
215                 /* return GENERIC_ALL; */
216                 return (GENERIC_READ | GENERIC_WRITE);
217         }
218
219         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
220                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
221                 FILE_READ_DATA);
222 }
223
224 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
225 static u32 cifs_posix_convert_flags(unsigned int flags)
226 {
227         u32 posix_flags = 0;
228
229         if ((flags & O_ACCMODE) == O_RDONLY)
230                 posix_flags = SMB_O_RDONLY;
231         else if ((flags & O_ACCMODE) == O_WRONLY)
232                 posix_flags = SMB_O_WRONLY;
233         else if ((flags & O_ACCMODE) == O_RDWR)
234                 posix_flags = SMB_O_RDWR;
235
236         if (flags & O_CREAT) {
237                 posix_flags |= SMB_O_CREAT;
238                 if (flags & O_EXCL)
239                         posix_flags |= SMB_O_EXCL;
240         } else if (flags & O_EXCL)
241                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
242                          current->comm, current->tgid);
243
244         if (flags & O_TRUNC)
245                 posix_flags |= SMB_O_TRUNC;
246         /* be safe and imply O_SYNC for O_DSYNC */
247         if (flags & O_DSYNC)
248                 posix_flags |= SMB_O_SYNC;
249         if (flags & O_DIRECTORY)
250                 posix_flags |= SMB_O_DIRECTORY;
251         if (flags & O_NOFOLLOW)
252                 posix_flags |= SMB_O_NOFOLLOW;
253         if (flags & O_DIRECT)
254                 posix_flags |= SMB_O_DIRECT;
255
256         return posix_flags;
257 }
258 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
259
260 static inline int cifs_get_disposition(unsigned int flags)
261 {
262         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
263                 return FILE_CREATE;
264         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
265                 return FILE_OVERWRITE_IF;
266         else if ((flags & O_CREAT) == O_CREAT)
267                 return FILE_OPEN_IF;
268         else if ((flags & O_TRUNC) == O_TRUNC)
269                 return FILE_OVERWRITE;
270         else
271                 return FILE_OPEN;
272 }
273
274 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
275 int cifs_posix_open(const char *full_path, struct inode **pinode,
276                         struct super_block *sb, int mode, unsigned int f_flags,
277                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
278 {
279         int rc;
280         FILE_UNIX_BASIC_INFO *presp_data;
281         __u32 posix_flags = 0;
282         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
283         struct cifs_fattr fattr;
284         struct tcon_link *tlink;
285         struct cifs_tcon *tcon;
286
287         cifs_dbg(FYI, "posix open %s\n", full_path);
288
289         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
290         if (presp_data == NULL)
291                 return -ENOMEM;
292
293         tlink = cifs_sb_tlink(cifs_sb);
294         if (IS_ERR(tlink)) {
295                 rc = PTR_ERR(tlink);
296                 goto posix_open_ret;
297         }
298
299         tcon = tlink_tcon(tlink);
300         mode &= ~current_umask();
301
302         posix_flags = cifs_posix_convert_flags(f_flags);
303         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
304                              poplock, full_path, cifs_sb->local_nls,
305                              cifs_remap(cifs_sb));
306         cifs_put_tlink(tlink);
307
308         if (rc)
309                 goto posix_open_ret;
310
311         if (presp_data->Type == cpu_to_le32(-1))
312                 goto posix_open_ret; /* open ok, caller does qpathinfo */
313
314         if (!pinode)
315                 goto posix_open_ret; /* caller does not need info */
316
317         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
318
319         /* get new inode and set it up */
320         if (*pinode == NULL) {
321                 cifs_fill_uniqueid(sb, &fattr);
322                 *pinode = cifs_iget(sb, &fattr);
323                 if (!*pinode) {
324                         rc = -ENOMEM;
325                         goto posix_open_ret;
326                 }
327         } else {
328                 cifs_revalidate_mapping(*pinode);
329                 rc = cifs_fattr_to_inode(*pinode, &fattr);
330         }
331
332 posix_open_ret:
333         kfree(presp_data);
334         return rc;
335 }
336 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
337
338 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
339                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
340                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
341 {
342         int rc;
343         int desired_access;
344         int disposition;
345         int create_options = CREATE_NOT_DIR;
346         struct TCP_Server_Info *server = tcon->ses->server;
347         struct cifs_open_parms oparms;
348
349         if (!server->ops->open)
350                 return -ENOSYS;
351
352         desired_access = cifs_convert_flags(f_flags);
353
354 /*********************************************************************
355  *  open flag mapping table:
356  *
357  *      POSIX Flag            CIFS Disposition
358  *      ----------            ----------------
359  *      O_CREAT               FILE_OPEN_IF
360  *      O_CREAT | O_EXCL      FILE_CREATE
361  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
362  *      O_TRUNC               FILE_OVERWRITE
363  *      none of the above     FILE_OPEN
364  *
365  *      Note that there is not a direct match between disposition
366  *      FILE_SUPERSEDE (ie create whether or not file exists although
367  *      O_CREAT | O_TRUNC is similar but truncates the existing
368  *      file rather than creating a new file as FILE_SUPERSEDE does
369  *      (which uses the attributes / metadata passed in on open call)
370  *?
371  *?  O_SYNC is a reasonable match to CIFS writethrough flag
372  *?  and the read write flags match reasonably.  O_LARGEFILE
373  *?  is irrelevant because largefile support is always used
374  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
375  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
376  *********************************************************************/
377
378         disposition = cifs_get_disposition(f_flags);
379
380         /* BB pass O_SYNC flag through on file attributes .. BB */
381
382         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
383         if (f_flags & O_SYNC)
384                 create_options |= CREATE_WRITE_THROUGH;
385
386         if (f_flags & O_DIRECT)
387                 create_options |= CREATE_NO_BUFFER;
388
389         oparms = (struct cifs_open_parms) {
390                 .tcon = tcon,
391                 .cifs_sb = cifs_sb,
392                 .desired_access = desired_access,
393                 .create_options = cifs_create_options(cifs_sb, create_options),
394                 .disposition = disposition,
395                 .path = full_path,
396                 .fid = fid,
397         };
398
399         rc = server->ops->open(xid, &oparms, oplock, buf);
400         if (rc)
401                 return rc;
402
403         /* TODO: Add support for calling posix query info but with passing in fid */
404         if (tcon->unix_ext)
405                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
406                                               xid);
407         else
408                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
409                                          xid, fid);
410
411         if (rc) {
412                 server->ops->close(xid, tcon, fid);
413                 if (rc == -ESTALE)
414                         rc = -EOPENSTALE;
415         }
416
417         return rc;
418 }
419
420 static bool
421 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
422 {
423         struct cifs_fid_locks *cur;
424         bool has_locks = false;
425
426         down_read(&cinode->lock_sem);
427         list_for_each_entry(cur, &cinode->llist, llist) {
428                 if (!list_empty(&cur->locks)) {
429                         has_locks = true;
430                         break;
431                 }
432         }
433         up_read(&cinode->lock_sem);
434         return has_locks;
435 }
436
437 void
438 cifs_down_write(struct rw_semaphore *sem)
439 {
440         while (!down_write_trylock(sem))
441                 msleep(10);
442 }
443
444 static void cifsFileInfo_put_work(struct work_struct *work);
445
446 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
447                                        struct tcon_link *tlink, __u32 oplock,
448                                        const char *symlink_target)
449 {
450         struct dentry *dentry = file_dentry(file);
451         struct inode *inode = d_inode(dentry);
452         struct cifsInodeInfo *cinode = CIFS_I(inode);
453         struct cifsFileInfo *cfile;
454         struct cifs_fid_locks *fdlocks;
455         struct cifs_tcon *tcon = tlink_tcon(tlink);
456         struct TCP_Server_Info *server = tcon->ses->server;
457
458         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
459         if (cfile == NULL)
460                 return cfile;
461
462         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
463         if (!fdlocks) {
464                 kfree(cfile);
465                 return NULL;
466         }
467
468         if (symlink_target) {
469                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
470                 if (!cfile->symlink_target) {
471                         kfree(fdlocks);
472                         kfree(cfile);
473                         return NULL;
474                 }
475         }
476
477         INIT_LIST_HEAD(&fdlocks->locks);
478         fdlocks->cfile = cfile;
479         cfile->llist = fdlocks;
480
481         cfile->count = 1;
482         cfile->pid = current->tgid;
483         cfile->uid = current_fsuid();
484         cfile->dentry = dget(dentry);
485         cfile->f_flags = file->f_flags;
486         cfile->invalidHandle = false;
487         cfile->deferred_close_scheduled = false;
488         cfile->tlink = cifs_get_tlink(tlink);
489         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
490         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
491         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
492         mutex_init(&cfile->fh_mutex);
493         spin_lock_init(&cfile->file_info_lock);
494
495         cifs_sb_active(inode->i_sb);
496
497         /*
498          * If the server returned a read oplock and we have mandatory brlocks,
499          * set oplock level to None.
500          */
501         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
502                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
503                 oplock = 0;
504         }
505
506         cifs_down_write(&cinode->lock_sem);
507         list_add(&fdlocks->llist, &cinode->llist);
508         up_write(&cinode->lock_sem);
509
510         spin_lock(&tcon->open_file_lock);
511         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
512                 oplock = fid->pending_open->oplock;
513         list_del(&fid->pending_open->olist);
514
515         fid->purge_cache = false;
516         server->ops->set_fid(cfile, fid, oplock);
517
518         list_add(&cfile->tlist, &tcon->openFileList);
519         atomic_inc(&tcon->num_local_opens);
520
521         /* if readable file instance put first in list*/
522         spin_lock(&cinode->open_file_lock);
523         if (file->f_mode & FMODE_READ)
524                 list_add(&cfile->flist, &cinode->openFileList);
525         else
526                 list_add_tail(&cfile->flist, &cinode->openFileList);
527         spin_unlock(&cinode->open_file_lock);
528         spin_unlock(&tcon->open_file_lock);
529
530         if (fid->purge_cache)
531                 cifs_zap_mapping(inode);
532
533         file->private_data = cfile;
534         return cfile;
535 }
536
537 struct cifsFileInfo *
538 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
539 {
540         spin_lock(&cifs_file->file_info_lock);
541         cifsFileInfo_get_locked(cifs_file);
542         spin_unlock(&cifs_file->file_info_lock);
543         return cifs_file;
544 }
545
546 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
547 {
548         struct inode *inode = d_inode(cifs_file->dentry);
549         struct cifsInodeInfo *cifsi = CIFS_I(inode);
550         struct cifsLockInfo *li, *tmp;
551         struct super_block *sb = inode->i_sb;
552
553         /*
554          * Delete any outstanding lock records. We'll lose them when the file
555          * is closed anyway.
556          */
557         cifs_down_write(&cifsi->lock_sem);
558         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
559                 list_del(&li->llist);
560                 cifs_del_lock_waiters(li);
561                 kfree(li);
562         }
563         list_del(&cifs_file->llist->llist);
564         kfree(cifs_file->llist);
565         up_write(&cifsi->lock_sem);
566
567         cifs_put_tlink(cifs_file->tlink);
568         dput(cifs_file->dentry);
569         cifs_sb_deactive(sb);
570         kfree(cifs_file->symlink_target);
571         kfree(cifs_file);
572 }
573
574 static void cifsFileInfo_put_work(struct work_struct *work)
575 {
576         struct cifsFileInfo *cifs_file = container_of(work,
577                         struct cifsFileInfo, put);
578
579         cifsFileInfo_put_final(cifs_file);
580 }
581
582 /**
583  * cifsFileInfo_put - release a reference of file priv data
584  *
585  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
586  *
587  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
588  */
589 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
590 {
591         _cifsFileInfo_put(cifs_file, true, true);
592 }
593
594 /**
595  * _cifsFileInfo_put - release a reference of file priv data
596  *
597  * This may involve closing the filehandle @cifs_file out on the
598  * server. Must be called without holding tcon->open_file_lock,
599  * cinode->open_file_lock and cifs_file->file_info_lock.
600  *
601  * If @wait_for_oplock_handler is true and we are releasing the last
602  * reference, wait for any running oplock break handler of the file
603  * and cancel any pending one.
604  *
605  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
606  * @wait_oplock_handler: must be false if called from oplock_break_handler
607  * @offload:    not offloaded on close and oplock breaks
608  *
609  */
610 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
611                        bool wait_oplock_handler, bool offload)
612 {
613         struct inode *inode = d_inode(cifs_file->dentry);
614         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
615         struct TCP_Server_Info *server = tcon->ses->server;
616         struct cifsInodeInfo *cifsi = CIFS_I(inode);
617         struct super_block *sb = inode->i_sb;
618         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
619         struct cifs_fid fid = {};
620         struct cifs_pending_open open;
621         bool oplock_break_cancelled;
622
623         spin_lock(&tcon->open_file_lock);
624         spin_lock(&cifsi->open_file_lock);
625         spin_lock(&cifs_file->file_info_lock);
626         if (--cifs_file->count > 0) {
627                 spin_unlock(&cifs_file->file_info_lock);
628                 spin_unlock(&cifsi->open_file_lock);
629                 spin_unlock(&tcon->open_file_lock);
630                 return;
631         }
632         spin_unlock(&cifs_file->file_info_lock);
633
634         if (server->ops->get_lease_key)
635                 server->ops->get_lease_key(inode, &fid);
636
637         /* store open in pending opens to make sure we don't miss lease break */
638         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
639
640         /* remove it from the lists */
641         list_del(&cifs_file->flist);
642         list_del(&cifs_file->tlist);
643         atomic_dec(&tcon->num_local_opens);
644
645         if (list_empty(&cifsi->openFileList)) {
646                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
647                          d_inode(cifs_file->dentry));
648                 /*
649                  * In strict cache mode we need invalidate mapping on the last
650                  * close  because it may cause a error when we open this file
651                  * again and get at least level II oplock.
652                  */
653                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
654                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
655                 cifs_set_oplock_level(cifsi, 0);
656         }
657
658         spin_unlock(&cifsi->open_file_lock);
659         spin_unlock(&tcon->open_file_lock);
660
661         oplock_break_cancelled = wait_oplock_handler ?
662                 cancel_work_sync(&cifs_file->oplock_break) : false;
663
664         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
665                 struct TCP_Server_Info *server = tcon->ses->server;
666                 unsigned int xid;
667
668                 xid = get_xid();
669                 if (server->ops->close_getattr)
670                         server->ops->close_getattr(xid, tcon, cifs_file);
671                 else if (server->ops->close)
672                         server->ops->close(xid, tcon, &cifs_file->fid);
673                 _free_xid(xid);
674         }
675
676         if (oplock_break_cancelled)
677                 cifs_done_oplock_break(cifsi);
678
679         cifs_del_pending_open(&open);
680
681         if (offload)
682                 queue_work(fileinfo_put_wq, &cifs_file->put);
683         else
684                 cifsFileInfo_put_final(cifs_file);
685 }
686
687 int cifs_open(struct inode *inode, struct file *file)
688
689 {
690         int rc = -EACCES;
691         unsigned int xid;
692         __u32 oplock;
693         struct cifs_sb_info *cifs_sb;
694         struct TCP_Server_Info *server;
695         struct cifs_tcon *tcon;
696         struct tcon_link *tlink;
697         struct cifsFileInfo *cfile = NULL;
698         void *page;
699         const char *full_path;
700         bool posix_open_ok = false;
701         struct cifs_fid fid = {};
702         struct cifs_pending_open open;
703         struct cifs_open_info_data data = {};
704
705         xid = get_xid();
706
707         cifs_sb = CIFS_SB(inode->i_sb);
708         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
709                 free_xid(xid);
710                 return -EIO;
711         }
712
713         tlink = cifs_sb_tlink(cifs_sb);
714         if (IS_ERR(tlink)) {
715                 free_xid(xid);
716                 return PTR_ERR(tlink);
717         }
718         tcon = tlink_tcon(tlink);
719         server = tcon->ses->server;
720
721         page = alloc_dentry_path();
722         full_path = build_path_from_dentry(file_dentry(file), page);
723         if (IS_ERR(full_path)) {
724                 rc = PTR_ERR(full_path);
725                 goto out;
726         }
727
728         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
729                  inode, file->f_flags, full_path);
730
731         if (file->f_flags & O_DIRECT &&
732             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
733                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
734                         file->f_op = &cifs_file_direct_nobrl_ops;
735                 else
736                         file->f_op = &cifs_file_direct_ops;
737         }
738
739         /* Get the cached handle as SMB2 close is deferred */
740         rc = cifs_get_readable_path(tcon, full_path, &cfile);
741         if (rc == 0) {
742                 if (file->f_flags == cfile->f_flags) {
743                         file->private_data = cfile;
744                         spin_lock(&CIFS_I(inode)->deferred_lock);
745                         cifs_del_deferred_close(cfile);
746                         spin_unlock(&CIFS_I(inode)->deferred_lock);
747                         goto use_cache;
748                 } else {
749                         _cifsFileInfo_put(cfile, true, false);
750                 }
751         }
752
753         if (server->oplocks)
754                 oplock = REQ_OPLOCK;
755         else
756                 oplock = 0;
757
758 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
759         if (!tcon->broken_posix_open && tcon->unix_ext &&
760             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
761                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
762                 /* can not refresh inode info since size could be stale */
763                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
764                                 cifs_sb->ctx->file_mode /* ignored */,
765                                 file->f_flags, &oplock, &fid.netfid, xid);
766                 if (rc == 0) {
767                         cifs_dbg(FYI, "posix open succeeded\n");
768                         posix_open_ok = true;
769                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
770                         if (tcon->ses->serverNOS)
771                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
772                                          tcon->ses->ip_addr,
773                                          tcon->ses->serverNOS);
774                         tcon->broken_posix_open = true;
775                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
776                          (rc != -EOPNOTSUPP)) /* path not found or net err */
777                         goto out;
778                 /*
779                  * Else fallthrough to retry open the old way on network i/o
780                  * or DFS errors.
781                  */
782         }
783 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
784
785         if (server->ops->get_lease_key)
786                 server->ops->get_lease_key(inode, &fid);
787
788         cifs_add_pending_open(&fid, tlink, &open);
789
790         if (!posix_open_ok) {
791                 if (server->ops->get_lease_key)
792                         server->ops->get_lease_key(inode, &fid);
793
794                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
795                                   xid, &data);
796                 if (rc) {
797                         cifs_del_pending_open(&open);
798                         goto out;
799                 }
800         }
801
802         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
803         if (cfile == NULL) {
804                 if (server->ops->close)
805                         server->ops->close(xid, tcon, &fid);
806                 cifs_del_pending_open(&open);
807                 rc = -ENOMEM;
808                 goto out;
809         }
810
811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
812         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
813                 /*
814                  * Time to set mode which we can not set earlier due to
815                  * problems creating new read-only files.
816                  */
817                 struct cifs_unix_set_info_args args = {
818                         .mode   = inode->i_mode,
819                         .uid    = INVALID_UID, /* no change */
820                         .gid    = INVALID_GID, /* no change */
821                         .ctime  = NO_CHANGE_64,
822                         .atime  = NO_CHANGE_64,
823                         .mtime  = NO_CHANGE_64,
824                         .device = 0,
825                 };
826                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
827                                        cfile->pid);
828         }
829 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
830
831 use_cache:
832         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
833                            file->f_mode & FMODE_WRITE);
834         if (file->f_flags & O_DIRECT &&
835             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
836              file->f_flags & O_APPEND))
837                 cifs_invalidate_cache(file_inode(file),
838                                       FSCACHE_INVAL_DIO_WRITE);
839
840 out:
841         free_dentry_path(page);
842         free_xid(xid);
843         cifs_put_tlink(tlink);
844         cifs_free_open_info(&data);
845         return rc;
846 }
847
848 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
849 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
850 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
851
852 /*
853  * Try to reacquire byte range locks that were released when session
854  * to server was lost.
855  */
856 static int
857 cifs_relock_file(struct cifsFileInfo *cfile)
858 {
859         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
860         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
861         int rc = 0;
862 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
863         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
864 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
865
866         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
867         if (cinode->can_cache_brlcks) {
868                 /* can cache locks - no need to relock */
869                 up_read(&cinode->lock_sem);
870                 return rc;
871         }
872
873 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
874         if (cap_unix(tcon->ses) &&
875             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
876             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
877                 rc = cifs_push_posix_locks(cfile);
878         else
879 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
880                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
881
882         up_read(&cinode->lock_sem);
883         return rc;
884 }
885
886 static int
887 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
888 {
889         int rc = -EACCES;
890         unsigned int xid;
891         __u32 oplock;
892         struct cifs_sb_info *cifs_sb;
893         struct cifs_tcon *tcon;
894         struct TCP_Server_Info *server;
895         struct cifsInodeInfo *cinode;
896         struct inode *inode;
897         void *page;
898         const char *full_path;
899         int desired_access;
900         int disposition = FILE_OPEN;
901         int create_options = CREATE_NOT_DIR;
902         struct cifs_open_parms oparms;
903
904         xid = get_xid();
905         mutex_lock(&cfile->fh_mutex);
906         if (!cfile->invalidHandle) {
907                 mutex_unlock(&cfile->fh_mutex);
908                 free_xid(xid);
909                 return 0;
910         }
911
912         inode = d_inode(cfile->dentry);
913         cifs_sb = CIFS_SB(inode->i_sb);
914         tcon = tlink_tcon(cfile->tlink);
915         server = tcon->ses->server;
916
917         /*
918          * Can not grab rename sem here because various ops, including those
919          * that already have the rename sem can end up causing writepage to get
920          * called and if the server was down that means we end up here, and we
921          * can never tell if the caller already has the rename_sem.
922          */
923         page = alloc_dentry_path();
924         full_path = build_path_from_dentry(cfile->dentry, page);
925         if (IS_ERR(full_path)) {
926                 mutex_unlock(&cfile->fh_mutex);
927                 free_dentry_path(page);
928                 free_xid(xid);
929                 return PTR_ERR(full_path);
930         }
931
932         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
933                  inode, cfile->f_flags, full_path);
934
935         if (tcon->ses->server->oplocks)
936                 oplock = REQ_OPLOCK;
937         else
938                 oplock = 0;
939
940 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
941         if (tcon->unix_ext && cap_unix(tcon->ses) &&
942             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
943                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
944                 /*
945                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
946                  * original open. Must mask them off for a reopen.
947                  */
948                 unsigned int oflags = cfile->f_flags &
949                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
950
951                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
952                                      cifs_sb->ctx->file_mode /* ignored */,
953                                      oflags, &oplock, &cfile->fid.netfid, xid);
954                 if (rc == 0) {
955                         cifs_dbg(FYI, "posix reopen succeeded\n");
956                         oparms.reconnect = true;
957                         goto reopen_success;
958                 }
959                 /*
960                  * fallthrough to retry open the old way on errors, especially
961                  * in the reconnect path it is important to retry hard
962                  */
963         }
964 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
965
966         desired_access = cifs_convert_flags(cfile->f_flags);
967
968         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
969         if (cfile->f_flags & O_SYNC)
970                 create_options |= CREATE_WRITE_THROUGH;
971
972         if (cfile->f_flags & O_DIRECT)
973                 create_options |= CREATE_NO_BUFFER;
974
975         if (server->ops->get_lease_key)
976                 server->ops->get_lease_key(inode, &cfile->fid);
977
978         oparms = (struct cifs_open_parms) {
979                 .tcon = tcon,
980                 .cifs_sb = cifs_sb,
981                 .desired_access = desired_access,
982                 .create_options = cifs_create_options(cifs_sb, create_options),
983                 .disposition = disposition,
984                 .path = full_path,
985                 .fid = &cfile->fid,
986                 .reconnect = true,
987         };
988
989         /*
990          * Can not refresh inode by passing in file_info buf to be returned by
991          * ops->open and then calling get_inode_info with returned buf since
992          * file might have write behind data that needs to be flushed and server
993          * version of file size can be stale. If we knew for sure that inode was
994          * not dirty locally we could do this.
995          */
996         rc = server->ops->open(xid, &oparms, &oplock, NULL);
997         if (rc == -ENOENT && oparms.reconnect == false) {
998                 /* durable handle timeout is expired - open the file again */
999                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1000                 /* indicate that we need to relock the file */
1001                 oparms.reconnect = true;
1002         }
1003
1004         if (rc) {
1005                 mutex_unlock(&cfile->fh_mutex);
1006                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1007                 cifs_dbg(FYI, "oplock: %d\n", oplock);
1008                 goto reopen_error_exit;
1009         }
1010
1011 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1012 reopen_success:
1013 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1014         cfile->invalidHandle = false;
1015         mutex_unlock(&cfile->fh_mutex);
1016         cinode = CIFS_I(inode);
1017
1018         if (can_flush) {
1019                 rc = filemap_write_and_wait(inode->i_mapping);
1020                 if (!is_interrupt_error(rc))
1021                         mapping_set_error(inode->i_mapping, rc);
1022
1023                 if (tcon->posix_extensions) {
1024                         rc = smb311_posix_get_inode_info(&inode, full_path,
1025                                                          NULL, inode->i_sb, xid);
1026                 } else if (tcon->unix_ext) {
1027                         rc = cifs_get_inode_info_unix(&inode, full_path,
1028                                                       inode->i_sb, xid);
1029                 } else {
1030                         rc = cifs_get_inode_info(&inode, full_path, NULL,
1031                                                  inode->i_sb, xid, NULL);
1032                 }
1033         }
1034         /*
1035          * Else we are writing out data to server already and could deadlock if
1036          * we tried to flush data, and since we do not know if we have data that
1037          * would invalidate the current end of file on the server we can not go
1038          * to the server to get the new inode info.
1039          */
1040
1041         /*
1042          * If the server returned a read oplock and we have mandatory brlocks,
1043          * set oplock level to None.
1044          */
1045         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1046                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1047                 oplock = 0;
1048         }
1049
1050         server->ops->set_fid(cfile, &cfile->fid, oplock);
1051         if (oparms.reconnect)
1052                 cifs_relock_file(cfile);
1053
1054 reopen_error_exit:
1055         free_dentry_path(page);
1056         free_xid(xid);
1057         return rc;
1058 }
1059
1060 void smb2_deferred_work_close(struct work_struct *work)
1061 {
1062         struct cifsFileInfo *cfile = container_of(work,
1063                         struct cifsFileInfo, deferred.work);
1064
1065         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1066         cifs_del_deferred_close(cfile);
1067         cfile->deferred_close_scheduled = false;
1068         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1069         _cifsFileInfo_put(cfile, true, false);
1070 }
1071
1072 int cifs_close(struct inode *inode, struct file *file)
1073 {
1074         struct cifsFileInfo *cfile;
1075         struct cifsInodeInfo *cinode = CIFS_I(inode);
1076         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1077         struct cifs_deferred_close *dclose;
1078
1079         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1080
1081         if (file->private_data != NULL) {
1082                 cfile = file->private_data;
1083                 file->private_data = NULL;
1084                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1085                 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1086                     && cinode->lease_granted &&
1087                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1088                     dclose) {
1089                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1090                                 inode_set_mtime_to_ts(inode,
1091                                                       inode_set_ctime_current(inode));
1092                         }
1093                         spin_lock(&cinode->deferred_lock);
1094                         cifs_add_deferred_close(cfile, dclose);
1095                         if (cfile->deferred_close_scheduled &&
1096                             delayed_work_pending(&cfile->deferred)) {
1097                                 /*
1098                                  * If there is no pending work, mod_delayed_work queues new work.
1099                                  * So, Increase the ref count to avoid use-after-free.
1100                                  */
1101                                 if (!mod_delayed_work(deferredclose_wq,
1102                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1103                                         cifsFileInfo_get(cfile);
1104                         } else {
1105                                 /* Deferred close for files */
1106                                 queue_delayed_work(deferredclose_wq,
1107                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1108                                 cfile->deferred_close_scheduled = true;
1109                                 spin_unlock(&cinode->deferred_lock);
1110                                 return 0;
1111                         }
1112                         spin_unlock(&cinode->deferred_lock);
1113                         _cifsFileInfo_put(cfile, true, false);
1114                 } else {
1115                         _cifsFileInfo_put(cfile, true, false);
1116                         kfree(dclose);
1117                 }
1118         }
1119
1120         /* return code from the ->release op is always ignored */
1121         return 0;
1122 }
1123
1124 void
1125 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1126 {
1127         struct cifsFileInfo *open_file, *tmp;
1128         struct list_head tmp_list;
1129
1130         if (!tcon->use_persistent || !tcon->need_reopen_files)
1131                 return;
1132
1133         tcon->need_reopen_files = false;
1134
1135         cifs_dbg(FYI, "Reopen persistent handles\n");
1136         INIT_LIST_HEAD(&tmp_list);
1137
1138         /* list all files open on tree connection, reopen resilient handles  */
1139         spin_lock(&tcon->open_file_lock);
1140         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1141                 if (!open_file->invalidHandle)
1142                         continue;
1143                 cifsFileInfo_get(open_file);
1144                 list_add_tail(&open_file->rlist, &tmp_list);
1145         }
1146         spin_unlock(&tcon->open_file_lock);
1147
1148         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1149                 if (cifs_reopen_file(open_file, false /* do not flush */))
1150                         tcon->need_reopen_files = true;
1151                 list_del_init(&open_file->rlist);
1152                 cifsFileInfo_put(open_file);
1153         }
1154 }
1155
1156 int cifs_closedir(struct inode *inode, struct file *file)
1157 {
1158         int rc = 0;
1159         unsigned int xid;
1160         struct cifsFileInfo *cfile = file->private_data;
1161         struct cifs_tcon *tcon;
1162         struct TCP_Server_Info *server;
1163         char *buf;
1164
1165         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1166
1167         if (cfile == NULL)
1168                 return rc;
1169
1170         xid = get_xid();
1171         tcon = tlink_tcon(cfile->tlink);
1172         server = tcon->ses->server;
1173
1174         cifs_dbg(FYI, "Freeing private data in close dir\n");
1175         spin_lock(&cfile->file_info_lock);
1176         if (server->ops->dir_needs_close(cfile)) {
1177                 cfile->invalidHandle = true;
1178                 spin_unlock(&cfile->file_info_lock);
1179                 if (server->ops->close_dir)
1180                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1181                 else
1182                         rc = -ENOSYS;
1183                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1184                 /* not much we can do if it fails anyway, ignore rc */
1185                 rc = 0;
1186         } else
1187                 spin_unlock(&cfile->file_info_lock);
1188
1189         buf = cfile->srch_inf.ntwrk_buf_start;
1190         if (buf) {
1191                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1192                 cfile->srch_inf.ntwrk_buf_start = NULL;
1193                 if (cfile->srch_inf.smallBuf)
1194                         cifs_small_buf_release(buf);
1195                 else
1196                         cifs_buf_release(buf);
1197         }
1198
1199         cifs_put_tlink(cfile->tlink);
1200         kfree(file->private_data);
1201         file->private_data = NULL;
1202         /* BB can we lock the filestruct while this is going on? */
1203         free_xid(xid);
1204         return rc;
1205 }
1206
1207 static struct cifsLockInfo *
1208 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1209 {
1210         struct cifsLockInfo *lock =
1211                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1212         if (!lock)
1213                 return lock;
1214         lock->offset = offset;
1215         lock->length = length;
1216         lock->type = type;
1217         lock->pid = current->tgid;
1218         lock->flags = flags;
1219         INIT_LIST_HEAD(&lock->blist);
1220         init_waitqueue_head(&lock->block_q);
1221         return lock;
1222 }
1223
1224 void
1225 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1226 {
1227         struct cifsLockInfo *li, *tmp;
1228         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1229                 list_del_init(&li->blist);
1230                 wake_up(&li->block_q);
1231         }
1232 }
1233
1234 #define CIFS_LOCK_OP    0
1235 #define CIFS_READ_OP    1
1236 #define CIFS_WRITE_OP   2
1237
1238 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1239 static bool
1240 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1241                             __u64 length, __u8 type, __u16 flags,
1242                             struct cifsFileInfo *cfile,
1243                             struct cifsLockInfo **conf_lock, int rw_check)
1244 {
1245         struct cifsLockInfo *li;
1246         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1247         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1248
1249         list_for_each_entry(li, &fdlocks->locks, llist) {
1250                 if (offset + length <= li->offset ||
1251                     offset >= li->offset + li->length)
1252                         continue;
1253                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1254                     server->ops->compare_fids(cfile, cur_cfile)) {
1255                         /* shared lock prevents write op through the same fid */
1256                         if (!(li->type & server->vals->shared_lock_type) ||
1257                             rw_check != CIFS_WRITE_OP)
1258                                 continue;
1259                 }
1260                 if ((type & server->vals->shared_lock_type) &&
1261                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1262                      current->tgid == li->pid) || type == li->type))
1263                         continue;
1264                 if (rw_check == CIFS_LOCK_OP &&
1265                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1266                     server->ops->compare_fids(cfile, cur_cfile))
1267                         continue;
1268                 if (conf_lock)
1269                         *conf_lock = li;
1270                 return true;
1271         }
1272         return false;
1273 }
1274
1275 bool
1276 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1277                         __u8 type, __u16 flags,
1278                         struct cifsLockInfo **conf_lock, int rw_check)
1279 {
1280         bool rc = false;
1281         struct cifs_fid_locks *cur;
1282         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1283
1284         list_for_each_entry(cur, &cinode->llist, llist) {
1285                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1286                                                  flags, cfile, conf_lock,
1287                                                  rw_check);
1288                 if (rc)
1289                         break;
1290         }
1291
1292         return rc;
1293 }
1294
1295 /*
1296  * Check if there is another lock that prevents us to set the lock (mandatory
1297  * style). If such a lock exists, update the flock structure with its
1298  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1299  * or leave it the same if we can't. Returns 0 if we don't need to request to
1300  * the server or 1 otherwise.
1301  */
1302 static int
1303 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1304                __u8 type, struct file_lock *flock)
1305 {
1306         int rc = 0;
1307         struct cifsLockInfo *conf_lock;
1308         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1309         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1310         bool exist;
1311
1312         down_read(&cinode->lock_sem);
1313
1314         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1315                                         flock->fl_flags, &conf_lock,
1316                                         CIFS_LOCK_OP);
1317         if (exist) {
1318                 flock->fl_start = conf_lock->offset;
1319                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1320                 flock->fl_pid = conf_lock->pid;
1321                 if (conf_lock->type & server->vals->shared_lock_type)
1322                         flock->fl_type = F_RDLCK;
1323                 else
1324                         flock->fl_type = F_WRLCK;
1325         } else if (!cinode->can_cache_brlcks)
1326                 rc = 1;
1327         else
1328                 flock->fl_type = F_UNLCK;
1329
1330         up_read(&cinode->lock_sem);
1331         return rc;
1332 }
1333
1334 static void
1335 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1336 {
1337         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1338         cifs_down_write(&cinode->lock_sem);
1339         list_add_tail(&lock->llist, &cfile->llist->locks);
1340         up_write(&cinode->lock_sem);
1341 }
1342
1343 /*
1344  * Set the byte-range lock (mandatory style). Returns:
1345  * 1) 0, if we set the lock and don't need to request to the server;
1346  * 2) 1, if no locks prevent us but we need to request to the server;
1347  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1348  */
1349 static int
1350 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1351                  bool wait)
1352 {
1353         struct cifsLockInfo *conf_lock;
1354         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1355         bool exist;
1356         int rc = 0;
1357
1358 try_again:
1359         exist = false;
1360         cifs_down_write(&cinode->lock_sem);
1361
1362         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1363                                         lock->type, lock->flags, &conf_lock,
1364                                         CIFS_LOCK_OP);
1365         if (!exist && cinode->can_cache_brlcks) {
1366                 list_add_tail(&lock->llist, &cfile->llist->locks);
1367                 up_write(&cinode->lock_sem);
1368                 return rc;
1369         }
1370
1371         if (!exist)
1372                 rc = 1;
1373         else if (!wait)
1374                 rc = -EACCES;
1375         else {
1376                 list_add_tail(&lock->blist, &conf_lock->blist);
1377                 up_write(&cinode->lock_sem);
1378                 rc = wait_event_interruptible(lock->block_q,
1379                                         (lock->blist.prev == &lock->blist) &&
1380                                         (lock->blist.next == &lock->blist));
1381                 if (!rc)
1382                         goto try_again;
1383                 cifs_down_write(&cinode->lock_sem);
1384                 list_del_init(&lock->blist);
1385         }
1386
1387         up_write(&cinode->lock_sem);
1388         return rc;
1389 }
1390
1391 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1392 /*
1393  * Check if there is another lock that prevents us to set the lock (posix
1394  * style). If such a lock exists, update the flock structure with its
1395  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1396  * or leave it the same if we can't. Returns 0 if we don't need to request to
1397  * the server or 1 otherwise.
1398  */
1399 static int
1400 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1401 {
1402         int rc = 0;
1403         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1404         unsigned char saved_type = flock->fl_type;
1405
1406         if ((flock->fl_flags & FL_POSIX) == 0)
1407                 return 1;
1408
1409         down_read(&cinode->lock_sem);
1410         posix_test_lock(file, flock);
1411
1412         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1413                 flock->fl_type = saved_type;
1414                 rc = 1;
1415         }
1416
1417         up_read(&cinode->lock_sem);
1418         return rc;
1419 }
1420
1421 /*
1422  * Set the byte-range lock (posix style). Returns:
1423  * 1) <0, if the error occurs while setting the lock;
1424  * 2) 0, if we set the lock and don't need to request to the server;
1425  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1426  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1427  */
1428 static int
1429 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1430 {
1431         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1432         int rc = FILE_LOCK_DEFERRED + 1;
1433
1434         if ((flock->fl_flags & FL_POSIX) == 0)
1435                 return rc;
1436
1437         cifs_down_write(&cinode->lock_sem);
1438         if (!cinode->can_cache_brlcks) {
1439                 up_write(&cinode->lock_sem);
1440                 return rc;
1441         }
1442
1443         rc = posix_lock_file(file, flock, NULL);
1444         up_write(&cinode->lock_sem);
1445         return rc;
1446 }
1447
1448 int
1449 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1450 {
1451         unsigned int xid;
1452         int rc = 0, stored_rc;
1453         struct cifsLockInfo *li, *tmp;
1454         struct cifs_tcon *tcon;
1455         unsigned int num, max_num, max_buf;
1456         LOCKING_ANDX_RANGE *buf, *cur;
1457         static const int types[] = {
1458                 LOCKING_ANDX_LARGE_FILES,
1459                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1460         };
1461         int i;
1462
1463         xid = get_xid();
1464         tcon = tlink_tcon(cfile->tlink);
1465
1466         /*
1467          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1468          * and check it before using.
1469          */
1470         max_buf = tcon->ses->server->maxBuf;
1471         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1472                 free_xid(xid);
1473                 return -EINVAL;
1474         }
1475
1476         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1477                      PAGE_SIZE);
1478         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1479                         PAGE_SIZE);
1480         max_num = (max_buf - sizeof(struct smb_hdr)) /
1481                                                 sizeof(LOCKING_ANDX_RANGE);
1482         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1483         if (!buf) {
1484                 free_xid(xid);
1485                 return -ENOMEM;
1486         }
1487
1488         for (i = 0; i < 2; i++) {
1489                 cur = buf;
1490                 num = 0;
1491                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1492                         if (li->type != types[i])
1493                                 continue;
1494                         cur->Pid = cpu_to_le16(li->pid);
1495                         cur->LengthLow = cpu_to_le32((u32)li->length);
1496                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1497                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1498                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1499                         if (++num == max_num) {
1500                                 stored_rc = cifs_lockv(xid, tcon,
1501                                                        cfile->fid.netfid,
1502                                                        (__u8)li->type, 0, num,
1503                                                        buf);
1504                                 if (stored_rc)
1505                                         rc = stored_rc;
1506                                 cur = buf;
1507                                 num = 0;
1508                         } else
1509                                 cur++;
1510                 }
1511
1512                 if (num) {
1513                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1514                                                (__u8)types[i], 0, num, buf);
1515                         if (stored_rc)
1516                                 rc = stored_rc;
1517                 }
1518         }
1519
1520         kfree(buf);
1521         free_xid(xid);
1522         return rc;
1523 }
1524
1525 static __u32
1526 hash_lockowner(fl_owner_t owner)
1527 {
1528         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1529 }
1530 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1531
1532 struct lock_to_push {
1533         struct list_head llist;
1534         __u64 offset;
1535         __u64 length;
1536         __u32 pid;
1537         __u16 netfid;
1538         __u8 type;
1539 };
1540
1541 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1542 static int
1543 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1544 {
1545         struct inode *inode = d_inode(cfile->dentry);
1546         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1547         struct file_lock *flock;
1548         struct file_lock_context *flctx = locks_inode_context(inode);
1549         unsigned int count = 0, i;
1550         int rc = 0, xid, type;
1551         struct list_head locks_to_send, *el;
1552         struct lock_to_push *lck, *tmp;
1553         __u64 length;
1554
1555         xid = get_xid();
1556
1557         if (!flctx)
1558                 goto out;
1559
1560         spin_lock(&flctx->flc_lock);
1561         list_for_each(el, &flctx->flc_posix) {
1562                 count++;
1563         }
1564         spin_unlock(&flctx->flc_lock);
1565
1566         INIT_LIST_HEAD(&locks_to_send);
1567
1568         /*
1569          * Allocating count locks is enough because no FL_POSIX locks can be
1570          * added to the list while we are holding cinode->lock_sem that
1571          * protects locking operations of this inode.
1572          */
1573         for (i = 0; i < count; i++) {
1574                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1575                 if (!lck) {
1576                         rc = -ENOMEM;
1577                         goto err_out;
1578                 }
1579                 list_add_tail(&lck->llist, &locks_to_send);
1580         }
1581
1582         el = locks_to_send.next;
1583         spin_lock(&flctx->flc_lock);
1584         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1585                 if (el == &locks_to_send) {
1586                         /*
1587                          * The list ended. We don't have enough allocated
1588                          * structures - something is really wrong.
1589                          */
1590                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1591                         break;
1592                 }
1593                 length = cifs_flock_len(flock);
1594                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1595                         type = CIFS_RDLCK;
1596                 else
1597                         type = CIFS_WRLCK;
1598                 lck = list_entry(el, struct lock_to_push, llist);
1599                 lck->pid = hash_lockowner(flock->fl_owner);
1600                 lck->netfid = cfile->fid.netfid;
1601                 lck->length = length;
1602                 lck->type = type;
1603                 lck->offset = flock->fl_start;
1604         }
1605         spin_unlock(&flctx->flc_lock);
1606
1607         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1608                 int stored_rc;
1609
1610                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1611                                              lck->offset, lck->length, NULL,
1612                                              lck->type, 0);
1613                 if (stored_rc)
1614                         rc = stored_rc;
1615                 list_del(&lck->llist);
1616                 kfree(lck);
1617         }
1618
1619 out:
1620         free_xid(xid);
1621         return rc;
1622 err_out:
1623         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1624                 list_del(&lck->llist);
1625                 kfree(lck);
1626         }
1627         goto out;
1628 }
1629 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1630
1631 static int
1632 cifs_push_locks(struct cifsFileInfo *cfile)
1633 {
1634         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1635         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1636         int rc = 0;
1637 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1638         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1639 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1640
1641         /* we are going to update can_cache_brlcks here - need a write access */
1642         cifs_down_write(&cinode->lock_sem);
1643         if (!cinode->can_cache_brlcks) {
1644                 up_write(&cinode->lock_sem);
1645                 return rc;
1646         }
1647
1648 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1649         if (cap_unix(tcon->ses) &&
1650             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1651             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1652                 rc = cifs_push_posix_locks(cfile);
1653         else
1654 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1655                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1656
1657         cinode->can_cache_brlcks = false;
1658         up_write(&cinode->lock_sem);
1659         return rc;
1660 }
1661
1662 static void
1663 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1664                 bool *wait_flag, struct TCP_Server_Info *server)
1665 {
1666         if (flock->fl_flags & FL_POSIX)
1667                 cifs_dbg(FYI, "Posix\n");
1668         if (flock->fl_flags & FL_FLOCK)
1669                 cifs_dbg(FYI, "Flock\n");
1670         if (flock->fl_flags & FL_SLEEP) {
1671                 cifs_dbg(FYI, "Blocking lock\n");
1672                 *wait_flag = true;
1673         }
1674         if (flock->fl_flags & FL_ACCESS)
1675                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1676         if (flock->fl_flags & FL_LEASE)
1677                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1678         if (flock->fl_flags &
1679             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1680                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1681                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1682
1683         *type = server->vals->large_lock_type;
1684         if (flock->fl_type == F_WRLCK) {
1685                 cifs_dbg(FYI, "F_WRLCK\n");
1686                 *type |= server->vals->exclusive_lock_type;
1687                 *lock = 1;
1688         } else if (flock->fl_type == F_UNLCK) {
1689                 cifs_dbg(FYI, "F_UNLCK\n");
1690                 *type |= server->vals->unlock_lock_type;
1691                 *unlock = 1;
1692                 /* Check if unlock includes more than one lock range */
1693         } else if (flock->fl_type == F_RDLCK) {
1694                 cifs_dbg(FYI, "F_RDLCK\n");
1695                 *type |= server->vals->shared_lock_type;
1696                 *lock = 1;
1697         } else if (flock->fl_type == F_EXLCK) {
1698                 cifs_dbg(FYI, "F_EXLCK\n");
1699                 *type |= server->vals->exclusive_lock_type;
1700                 *lock = 1;
1701         } else if (flock->fl_type == F_SHLCK) {
1702                 cifs_dbg(FYI, "F_SHLCK\n");
1703                 *type |= server->vals->shared_lock_type;
1704                 *lock = 1;
1705         } else
1706                 cifs_dbg(FYI, "Unknown type of lock\n");
1707 }
1708
1709 static int
1710 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1711            bool wait_flag, bool posix_lck, unsigned int xid)
1712 {
1713         int rc = 0;
1714         __u64 length = cifs_flock_len(flock);
1715         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1716         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1717         struct TCP_Server_Info *server = tcon->ses->server;
1718 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1719         __u16 netfid = cfile->fid.netfid;
1720
1721         if (posix_lck) {
1722                 int posix_lock_type;
1723
1724                 rc = cifs_posix_lock_test(file, flock);
1725                 if (!rc)
1726                         return rc;
1727
1728                 if (type & server->vals->shared_lock_type)
1729                         posix_lock_type = CIFS_RDLCK;
1730                 else
1731                         posix_lock_type = CIFS_WRLCK;
1732                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1733                                       hash_lockowner(flock->fl_owner),
1734                                       flock->fl_start, length, flock,
1735                                       posix_lock_type, wait_flag);
1736                 return rc;
1737         }
1738 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1739
1740         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1741         if (!rc)
1742                 return rc;
1743
1744         /* BB we could chain these into one lock request BB */
1745         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1746                                     1, 0, false);
1747         if (rc == 0) {
1748                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1749                                             type, 0, 1, false);
1750                 flock->fl_type = F_UNLCK;
1751                 if (rc != 0)
1752                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1753                                  rc);
1754                 return 0;
1755         }
1756
1757         if (type & server->vals->shared_lock_type) {
1758                 flock->fl_type = F_WRLCK;
1759                 return 0;
1760         }
1761
1762         type &= ~server->vals->exclusive_lock_type;
1763
1764         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1765                                     type | server->vals->shared_lock_type,
1766                                     1, 0, false);
1767         if (rc == 0) {
1768                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1769                         type | server->vals->shared_lock_type, 0, 1, false);
1770                 flock->fl_type = F_RDLCK;
1771                 if (rc != 0)
1772                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1773                                  rc);
1774         } else
1775                 flock->fl_type = F_WRLCK;
1776
1777         return 0;
1778 }
1779
1780 void
1781 cifs_move_llist(struct list_head *source, struct list_head *dest)
1782 {
1783         struct list_head *li, *tmp;
1784         list_for_each_safe(li, tmp, source)
1785                 list_move(li, dest);
1786 }
1787
1788 void
1789 cifs_free_llist(struct list_head *llist)
1790 {
1791         struct cifsLockInfo *li, *tmp;
1792         list_for_each_entry_safe(li, tmp, llist, llist) {
1793                 cifs_del_lock_waiters(li);
1794                 list_del(&li->llist);
1795                 kfree(li);
1796         }
1797 }
1798
1799 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1800 int
1801 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1802                   unsigned int xid)
1803 {
1804         int rc = 0, stored_rc;
1805         static const int types[] = {
1806                 LOCKING_ANDX_LARGE_FILES,
1807                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1808         };
1809         unsigned int i;
1810         unsigned int max_num, num, max_buf;
1811         LOCKING_ANDX_RANGE *buf, *cur;
1812         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1814         struct cifsLockInfo *li, *tmp;
1815         __u64 length = cifs_flock_len(flock);
1816         struct list_head tmp_llist;
1817
1818         INIT_LIST_HEAD(&tmp_llist);
1819
1820         /*
1821          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1822          * and check it before using.
1823          */
1824         max_buf = tcon->ses->server->maxBuf;
1825         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1826                 return -EINVAL;
1827
1828         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1829                      PAGE_SIZE);
1830         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1831                         PAGE_SIZE);
1832         max_num = (max_buf - sizeof(struct smb_hdr)) /
1833                                                 sizeof(LOCKING_ANDX_RANGE);
1834         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1835         if (!buf)
1836                 return -ENOMEM;
1837
1838         cifs_down_write(&cinode->lock_sem);
1839         for (i = 0; i < 2; i++) {
1840                 cur = buf;
1841                 num = 0;
1842                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1843                         if (flock->fl_start > li->offset ||
1844                             (flock->fl_start + length) <
1845                             (li->offset + li->length))
1846                                 continue;
1847                         if (current->tgid != li->pid)
1848                                 continue;
1849                         if (types[i] != li->type)
1850                                 continue;
1851                         if (cinode->can_cache_brlcks) {
1852                                 /*
1853                                  * We can cache brlock requests - simply remove
1854                                  * a lock from the file's list.
1855                                  */
1856                                 list_del(&li->llist);
1857                                 cifs_del_lock_waiters(li);
1858                                 kfree(li);
1859                                 continue;
1860                         }
1861                         cur->Pid = cpu_to_le16(li->pid);
1862                         cur->LengthLow = cpu_to_le32((u32)li->length);
1863                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1864                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1865                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1866                         /*
1867                          * We need to save a lock here to let us add it again to
1868                          * the file's list if the unlock range request fails on
1869                          * the server.
1870                          */
1871                         list_move(&li->llist, &tmp_llist);
1872                         if (++num == max_num) {
1873                                 stored_rc = cifs_lockv(xid, tcon,
1874                                                        cfile->fid.netfid,
1875                                                        li->type, num, 0, buf);
1876                                 if (stored_rc) {
1877                                         /*
1878                                          * We failed on the unlock range
1879                                          * request - add all locks from the tmp
1880                                          * list to the head of the file's list.
1881                                          */
1882                                         cifs_move_llist(&tmp_llist,
1883                                                         &cfile->llist->locks);
1884                                         rc = stored_rc;
1885                                 } else
1886                                         /*
1887                                          * The unlock range request succeed -
1888                                          * free the tmp list.
1889                                          */
1890                                         cifs_free_llist(&tmp_llist);
1891                                 cur = buf;
1892                                 num = 0;
1893                         } else
1894                                 cur++;
1895                 }
1896                 if (num) {
1897                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1898                                                types[i], num, 0, buf);
1899                         if (stored_rc) {
1900                                 cifs_move_llist(&tmp_llist,
1901                                                 &cfile->llist->locks);
1902                                 rc = stored_rc;
1903                         } else
1904                                 cifs_free_llist(&tmp_llist);
1905                 }
1906         }
1907
1908         up_write(&cinode->lock_sem);
1909         kfree(buf);
1910         return rc;
1911 }
1912 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1913
1914 static int
1915 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1916            bool wait_flag, bool posix_lck, int lock, int unlock,
1917            unsigned int xid)
1918 {
1919         int rc = 0;
1920         __u64 length = cifs_flock_len(flock);
1921         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1922         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1923         struct TCP_Server_Info *server = tcon->ses->server;
1924         struct inode *inode = d_inode(cfile->dentry);
1925
1926 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1927         if (posix_lck) {
1928                 int posix_lock_type;
1929
1930                 rc = cifs_posix_lock_set(file, flock);
1931                 if (rc <= FILE_LOCK_DEFERRED)
1932                         return rc;
1933
1934                 if (type & server->vals->shared_lock_type)
1935                         posix_lock_type = CIFS_RDLCK;
1936                 else
1937                         posix_lock_type = CIFS_WRLCK;
1938
1939                 if (unlock == 1)
1940                         posix_lock_type = CIFS_UNLCK;
1941
1942                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1943                                       hash_lockowner(flock->fl_owner),
1944                                       flock->fl_start, length,
1945                                       NULL, posix_lock_type, wait_flag);
1946                 goto out;
1947         }
1948 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1949         if (lock) {
1950                 struct cifsLockInfo *lock;
1951
1952                 lock = cifs_lock_init(flock->fl_start, length, type,
1953                                       flock->fl_flags);
1954                 if (!lock)
1955                         return -ENOMEM;
1956
1957                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1958                 if (rc < 0) {
1959                         kfree(lock);
1960                         return rc;
1961                 }
1962                 if (!rc)
1963                         goto out;
1964
1965                 /*
1966                  * Windows 7 server can delay breaking lease from read to None
1967                  * if we set a byte-range lock on a file - break it explicitly
1968                  * before sending the lock to the server to be sure the next
1969                  * read won't conflict with non-overlapted locks due to
1970                  * pagereading.
1971                  */
1972                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1973                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1974                         cifs_zap_mapping(inode);
1975                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1976                                  inode);
1977                         CIFS_I(inode)->oplock = 0;
1978                 }
1979
1980                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1981                                             type, 1, 0, wait_flag);
1982                 if (rc) {
1983                         kfree(lock);
1984                         return rc;
1985                 }
1986
1987                 cifs_lock_add(cfile, lock);
1988         } else if (unlock)
1989                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1990
1991 out:
1992         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1993                 /*
1994                  * If this is a request to remove all locks because we
1995                  * are closing the file, it doesn't matter if the
1996                  * unlocking failed as both cifs.ko and the SMB server
1997                  * remove the lock on file close
1998                  */
1999                 if (rc) {
2000                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2001                         if (!(flock->fl_flags & FL_CLOSE))
2002                                 return rc;
2003                 }
2004                 rc = locks_lock_file_wait(file, flock);
2005         }
2006         return rc;
2007 }
2008
2009 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2010 {
2011         int rc, xid;
2012         int lock = 0, unlock = 0;
2013         bool wait_flag = false;
2014         bool posix_lck = false;
2015         struct cifs_sb_info *cifs_sb;
2016         struct cifs_tcon *tcon;
2017         struct cifsFileInfo *cfile;
2018         __u32 type;
2019
2020         xid = get_xid();
2021
2022         if (!(fl->fl_flags & FL_FLOCK)) {
2023                 rc = -ENOLCK;
2024                 free_xid(xid);
2025                 return rc;
2026         }
2027
2028         cfile = (struct cifsFileInfo *)file->private_data;
2029         tcon = tlink_tcon(cfile->tlink);
2030
2031         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2032                         tcon->ses->server);
2033         cifs_sb = CIFS_FILE_SB(file);
2034
2035         if (cap_unix(tcon->ses) &&
2036             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2037             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2038                 posix_lck = true;
2039
2040         if (!lock && !unlock) {
2041                 /*
2042                  * if no lock or unlock then nothing to do since we do not
2043                  * know what it is
2044                  */
2045                 rc = -EOPNOTSUPP;
2046                 free_xid(xid);
2047                 return rc;
2048         }
2049
2050         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2051                         xid);
2052         free_xid(xid);
2053         return rc;
2054
2055
2056 }
2057
2058 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2059 {
2060         int rc, xid;
2061         int lock = 0, unlock = 0;
2062         bool wait_flag = false;
2063         bool posix_lck = false;
2064         struct cifs_sb_info *cifs_sb;
2065         struct cifs_tcon *tcon;
2066         struct cifsFileInfo *cfile;
2067         __u32 type;
2068
2069         rc = -EACCES;
2070         xid = get_xid();
2071
2072         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2073                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2074                  (long long)flock->fl_end);
2075
2076         cfile = (struct cifsFileInfo *)file->private_data;
2077         tcon = tlink_tcon(cfile->tlink);
2078
2079         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2080                         tcon->ses->server);
2081         cifs_sb = CIFS_FILE_SB(file);
2082         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2083
2084         if (cap_unix(tcon->ses) &&
2085             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2086             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2087                 posix_lck = true;
2088         /*
2089          * BB add code here to normalize offset and length to account for
2090          * negative length which we can not accept over the wire.
2091          */
2092         if (IS_GETLK(cmd)) {
2093                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2094                 free_xid(xid);
2095                 return rc;
2096         }
2097
2098         if (!lock && !unlock) {
2099                 /*
2100                  * if no lock or unlock then nothing to do since we do not
2101                  * know what it is
2102                  */
2103                 free_xid(xid);
2104                 return -EOPNOTSUPP;
2105         }
2106
2107         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2108                         xid);
2109         free_xid(xid);
2110         return rc;
2111 }
2112
2113 /*
2114  * update the file size (if needed) after a write. Should be called with
2115  * the inode->i_lock held
2116  */
2117 void
2118 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2119                       unsigned int bytes_written)
2120 {
2121         loff_t end_of_write = offset + bytes_written;
2122
2123         if (end_of_write > cifsi->netfs.remote_i_size)
2124                 netfs_resize_file(&cifsi->netfs, end_of_write, true);
2125 }
2126
2127 static ssize_t
2128 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2129            size_t write_size, loff_t *offset)
2130 {
2131         int rc = 0;
2132         unsigned int bytes_written = 0;
2133         unsigned int total_written;
2134         struct cifs_tcon *tcon;
2135         struct TCP_Server_Info *server;
2136         unsigned int xid;
2137         struct dentry *dentry = open_file->dentry;
2138         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2139         struct cifs_io_parms io_parms = {0};
2140
2141         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2142                  write_size, *offset, dentry);
2143
2144         tcon = tlink_tcon(open_file->tlink);
2145         server = tcon->ses->server;
2146
2147         if (!server->ops->sync_write)
2148                 return -ENOSYS;
2149
2150         xid = get_xid();
2151
2152         for (total_written = 0; write_size > total_written;
2153              total_written += bytes_written) {
2154                 rc = -EAGAIN;
2155                 while (rc == -EAGAIN) {
2156                         struct kvec iov[2];
2157                         unsigned int len;
2158
2159                         if (open_file->invalidHandle) {
2160                                 /* we could deadlock if we called
2161                                    filemap_fdatawait from here so tell
2162                                    reopen_file not to flush data to
2163                                    server now */
2164                                 rc = cifs_reopen_file(open_file, false);
2165                                 if (rc != 0)
2166                                         break;
2167                         }
2168
2169                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2170                                   (unsigned int)write_size - total_written);
2171                         /* iov[0] is reserved for smb header */
2172                         iov[1].iov_base = (char *)write_data + total_written;
2173                         iov[1].iov_len = len;
2174                         io_parms.pid = pid;
2175                         io_parms.tcon = tcon;
2176                         io_parms.offset = *offset;
2177                         io_parms.length = len;
2178                         rc = server->ops->sync_write(xid, &open_file->fid,
2179                                         &io_parms, &bytes_written, iov, 1);
2180                 }
2181                 if (rc || (bytes_written == 0)) {
2182                         if (total_written)
2183                                 break;
2184                         else {
2185                                 free_xid(xid);
2186                                 return rc;
2187                         }
2188                 } else {
2189                         spin_lock(&d_inode(dentry)->i_lock);
2190                         cifs_update_eof(cifsi, *offset, bytes_written);
2191                         spin_unlock(&d_inode(dentry)->i_lock);
2192                         *offset += bytes_written;
2193                 }
2194         }
2195
2196         cifs_stats_bytes_written(tcon, total_written);
2197
2198         if (total_written > 0) {
2199                 spin_lock(&d_inode(dentry)->i_lock);
2200                 if (*offset > d_inode(dentry)->i_size) {
2201                         i_size_write(d_inode(dentry), *offset);
2202                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2203                 }
2204                 spin_unlock(&d_inode(dentry)->i_lock);
2205         }
2206         mark_inode_dirty_sync(d_inode(dentry));
2207         free_xid(xid);
2208         return total_written;
2209 }
2210
2211 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2212                                         bool fsuid_only)
2213 {
2214         struct cifsFileInfo *open_file = NULL;
2215         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2216
2217         /* only filter by fsuid on multiuser mounts */
2218         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2219                 fsuid_only = false;
2220
2221         spin_lock(&cifs_inode->open_file_lock);
2222         /* we could simply get the first_list_entry since write-only entries
2223            are always at the end of the list but since the first entry might
2224            have a close pending, we go through the whole list */
2225         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2226                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2227                         continue;
2228                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2229                         if ((!open_file->invalidHandle)) {
2230                                 /* found a good file */
2231                                 /* lock it so it will not be closed on us */
2232                                 cifsFileInfo_get(open_file);
2233                                 spin_unlock(&cifs_inode->open_file_lock);
2234                                 return open_file;
2235                         } /* else might as well continue, and look for
2236                              another, or simply have the caller reopen it
2237                              again rather than trying to fix this handle */
2238                 } else /* write only file */
2239                         break; /* write only files are last so must be done */
2240         }
2241         spin_unlock(&cifs_inode->open_file_lock);
2242         return NULL;
2243 }
2244
2245 /* Return -EBADF if no handle is found and general rc otherwise */
2246 int
2247 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2248                        struct cifsFileInfo **ret_file)
2249 {
2250         struct cifsFileInfo *open_file, *inv_file = NULL;
2251         struct cifs_sb_info *cifs_sb;
2252         bool any_available = false;
2253         int rc = -EBADF;
2254         unsigned int refind = 0;
2255         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2256         bool with_delete = flags & FIND_WR_WITH_DELETE;
2257         *ret_file = NULL;
2258
2259         /*
2260          * Having a null inode here (because mapping->host was set to zero by
2261          * the VFS or MM) should not happen but we had reports of on oops (due
2262          * to it being zero) during stress testcases so we need to check for it
2263          */
2264
2265         if (cifs_inode == NULL) {
2266                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2267                 dump_stack();
2268                 return rc;
2269         }
2270
2271         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2272
2273         /* only filter by fsuid on multiuser mounts */
2274         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2275                 fsuid_only = false;
2276
2277         spin_lock(&cifs_inode->open_file_lock);
2278 refind_writable:
2279         if (refind > MAX_REOPEN_ATT) {
2280                 spin_unlock(&cifs_inode->open_file_lock);
2281                 return rc;
2282         }
2283         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2284                 if (!any_available && open_file->pid != current->tgid)
2285                         continue;
2286                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2287                         continue;
2288                 if (with_delete && !(open_file->fid.access & DELETE))
2289                         continue;
2290                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2291                         if (!open_file->invalidHandle) {
2292                                 /* found a good writable file */
2293                                 cifsFileInfo_get(open_file);
2294                                 spin_unlock(&cifs_inode->open_file_lock);
2295                                 *ret_file = open_file;
2296                                 return 0;
2297                         } else {
2298                                 if (!inv_file)
2299                                         inv_file = open_file;
2300                         }
2301                 }
2302         }
2303         /* couldn't find useable FH with same pid, try any available */
2304         if (!any_available) {
2305                 any_available = true;
2306                 goto refind_writable;
2307         }
2308
2309         if (inv_file) {
2310                 any_available = false;
2311                 cifsFileInfo_get(inv_file);
2312         }
2313
2314         spin_unlock(&cifs_inode->open_file_lock);
2315
2316         if (inv_file) {
2317                 rc = cifs_reopen_file(inv_file, false);
2318                 if (!rc) {
2319                         *ret_file = inv_file;
2320                         return 0;
2321                 }
2322
2323                 spin_lock(&cifs_inode->open_file_lock);
2324                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2325                 spin_unlock(&cifs_inode->open_file_lock);
2326                 cifsFileInfo_put(inv_file);
2327                 ++refind;
2328                 inv_file = NULL;
2329                 spin_lock(&cifs_inode->open_file_lock);
2330                 goto refind_writable;
2331         }
2332
2333         return rc;
2334 }
2335
2336 struct cifsFileInfo *
2337 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2338 {
2339         struct cifsFileInfo *cfile;
2340         int rc;
2341
2342         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2343         if (rc)
2344                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2345
2346         return cfile;
2347 }
2348
2349 int
2350 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2351                        int flags,
2352                        struct cifsFileInfo **ret_file)
2353 {
2354         struct cifsFileInfo *cfile;
2355         void *page = alloc_dentry_path();
2356
2357         *ret_file = NULL;
2358
2359         spin_lock(&tcon->open_file_lock);
2360         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2361                 struct cifsInodeInfo *cinode;
2362                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2363                 if (IS_ERR(full_path)) {
2364                         spin_unlock(&tcon->open_file_lock);
2365                         free_dentry_path(page);
2366                         return PTR_ERR(full_path);
2367                 }
2368                 if (strcmp(full_path, name))
2369                         continue;
2370
2371                 cinode = CIFS_I(d_inode(cfile->dentry));
2372                 spin_unlock(&tcon->open_file_lock);
2373                 free_dentry_path(page);
2374                 return cifs_get_writable_file(cinode, flags, ret_file);
2375         }
2376
2377         spin_unlock(&tcon->open_file_lock);
2378         free_dentry_path(page);
2379         return -ENOENT;
2380 }
2381
2382 int
2383 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2384                        struct cifsFileInfo **ret_file)
2385 {
2386         struct cifsFileInfo *cfile;
2387         void *page = alloc_dentry_path();
2388
2389         *ret_file = NULL;
2390
2391         spin_lock(&tcon->open_file_lock);
2392         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2393                 struct cifsInodeInfo *cinode;
2394                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2395                 if (IS_ERR(full_path)) {
2396                         spin_unlock(&tcon->open_file_lock);
2397                         free_dentry_path(page);
2398                         return PTR_ERR(full_path);
2399                 }
2400                 if (strcmp(full_path, name))
2401                         continue;
2402
2403                 cinode = CIFS_I(d_inode(cfile->dentry));
2404                 spin_unlock(&tcon->open_file_lock);
2405                 free_dentry_path(page);
2406                 *ret_file = find_readable_file(cinode, 0);
2407                 return *ret_file ? 0 : -ENOENT;
2408         }
2409
2410         spin_unlock(&tcon->open_file_lock);
2411         free_dentry_path(page);
2412         return -ENOENT;
2413 }
2414
2415 void
2416 cifs_writedata_release(struct kref *refcount)
2417 {
2418         struct cifs_writedata *wdata = container_of(refcount,
2419                                         struct cifs_writedata, refcount);
2420 #ifdef CONFIG_CIFS_SMB_DIRECT
2421         if (wdata->mr) {
2422                 smbd_deregister_mr(wdata->mr);
2423                 wdata->mr = NULL;
2424         }
2425 #endif
2426
2427         if (wdata->cfile)
2428                 cifsFileInfo_put(wdata->cfile);
2429
2430         kfree(wdata);
2431 }
2432
2433 /*
2434  * Write failed with a retryable error. Resend the write request. It's also
2435  * possible that the page was redirtied so re-clean the page.
2436  */
2437 static void
2438 cifs_writev_requeue(struct cifs_writedata *wdata)
2439 {
2440         int rc = 0;
2441         struct inode *inode = d_inode(wdata->cfile->dentry);
2442         struct TCP_Server_Info *server;
2443         unsigned int rest_len = wdata->bytes;
2444         loff_t fpos = wdata->offset;
2445
2446         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2447         do {
2448                 struct cifs_writedata *wdata2;
2449                 unsigned int wsize, cur_len;
2450
2451                 wsize = server->ops->wp_retry_size(inode);
2452                 if (wsize < rest_len) {
2453                         if (wsize < PAGE_SIZE) {
2454                                 rc = -EOPNOTSUPP;
2455                                 break;
2456                         }
2457                         cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2458                 } else {
2459                         cur_len = rest_len;
2460                 }
2461
2462                 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2463                 if (!wdata2) {
2464                         rc = -ENOMEM;
2465                         break;
2466                 }
2467
2468                 wdata2->sync_mode = wdata->sync_mode;
2469                 wdata2->offset  = fpos;
2470                 wdata2->bytes   = cur_len;
2471                 wdata2->iter    = wdata->iter;
2472
2473                 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2474                 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2475
2476                 if (iov_iter_is_xarray(&wdata2->iter))
2477                         /* Check for pages having been redirtied and clean
2478                          * them.  We can do this by walking the xarray.  If
2479                          * it's not an xarray, then it's a DIO and we shouldn't
2480                          * be mucking around with the page bits.
2481                          */
2482                         cifs_undirty_folios(inode, fpos, cur_len);
2483
2484                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2485                                             &wdata2->cfile);
2486                 if (!wdata2->cfile) {
2487                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2488                                  rc);
2489                         if (!is_retryable_error(rc))
2490                                 rc = -EBADF;
2491                 } else {
2492                         wdata2->pid = wdata2->cfile->pid;
2493                         rc = server->ops->async_writev(wdata2,
2494                                                        cifs_writedata_release);
2495                 }
2496
2497                 kref_put(&wdata2->refcount, cifs_writedata_release);
2498                 if (rc) {
2499                         if (is_retryable_error(rc))
2500                                 continue;
2501                         fpos += cur_len;
2502                         rest_len -= cur_len;
2503                         break;
2504                 }
2505
2506                 fpos += cur_len;
2507                 rest_len -= cur_len;
2508         } while (rest_len > 0);
2509
2510         /* Clean up remaining pages from the original wdata */
2511         if (iov_iter_is_xarray(&wdata->iter))
2512                 cifs_pages_write_failed(inode, fpos, rest_len);
2513
2514         if (rc != 0 && !is_retryable_error(rc))
2515                 mapping_set_error(inode->i_mapping, rc);
2516         kref_put(&wdata->refcount, cifs_writedata_release);
2517 }
2518
2519 void
2520 cifs_writev_complete(struct work_struct *work)
2521 {
2522         struct cifs_writedata *wdata = container_of(work,
2523                                                 struct cifs_writedata, work);
2524         struct inode *inode = d_inode(wdata->cfile->dentry);
2525
2526         if (wdata->result == 0) {
2527                 spin_lock(&inode->i_lock);
2528                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2529                 spin_unlock(&inode->i_lock);
2530                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2531                                          wdata->bytes);
2532         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2533                 return cifs_writev_requeue(wdata);
2534
2535         if (wdata->result == -EAGAIN)
2536                 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2537         else if (wdata->result < 0)
2538                 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2539         else
2540                 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2541
2542         if (wdata->result != -EAGAIN)
2543                 mapping_set_error(inode->i_mapping, wdata->result);
2544         kref_put(&wdata->refcount, cifs_writedata_release);
2545 }
2546
2547 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2548 {
2549         struct cifs_writedata *wdata;
2550
2551         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2552         if (wdata != NULL) {
2553                 kref_init(&wdata->refcount);
2554                 INIT_LIST_HEAD(&wdata->list);
2555                 init_completion(&wdata->done);
2556                 INIT_WORK(&wdata->work, complete);
2557         }
2558         return wdata;
2559 }
2560
2561 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2562 {
2563         struct address_space *mapping = page->mapping;
2564         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2565         char *write_data;
2566         int rc = -EFAULT;
2567         int bytes_written = 0;
2568         struct inode *inode;
2569         struct cifsFileInfo *open_file;
2570
2571         if (!mapping || !mapping->host)
2572                 return -EFAULT;
2573
2574         inode = page->mapping->host;
2575
2576         offset += (loff_t)from;
2577         write_data = kmap(page);
2578         write_data += from;
2579
2580         if ((to > PAGE_SIZE) || (from > to)) {
2581                 kunmap(page);
2582                 return -EIO;
2583         }
2584
2585         /* racing with truncate? */
2586         if (offset > mapping->host->i_size) {
2587                 kunmap(page);
2588                 return 0; /* don't care */
2589         }
2590
2591         /* check to make sure that we are not extending the file */
2592         if (mapping->host->i_size - offset < (loff_t)to)
2593                 to = (unsigned)(mapping->host->i_size - offset);
2594
2595         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2596                                     &open_file);
2597         if (!rc) {
2598                 bytes_written = cifs_write(open_file, open_file->pid,
2599                                            write_data, to - from, &offset);
2600                 cifsFileInfo_put(open_file);
2601                 /* Does mm or vfs already set times? */
2602                 simple_inode_init_ts(inode);
2603                 if ((bytes_written > 0) && (offset))
2604                         rc = 0;
2605                 else if (bytes_written < 0)
2606                         rc = bytes_written;
2607                 else
2608                         rc = -EFAULT;
2609         } else {
2610                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2611                 if (!is_retryable_error(rc))
2612                         rc = -EIO;
2613         }
2614
2615         kunmap(page);
2616         return rc;
2617 }
2618
2619 /*
2620  * Extend the region to be written back to include subsequent contiguously
2621  * dirty pages if possible, but don't sleep while doing so.
2622  */
2623 static void cifs_extend_writeback(struct address_space *mapping,
2624                                   long *_count,
2625                                   loff_t start,
2626                                   int max_pages,
2627                                   size_t max_len,
2628                                   unsigned int *_len)
2629 {
2630         struct folio_batch batch;
2631         struct folio *folio;
2632         unsigned int psize, nr_pages;
2633         size_t len = *_len;
2634         pgoff_t index = (start + len) / PAGE_SIZE;
2635         bool stop = true;
2636         unsigned int i;
2637         XA_STATE(xas, &mapping->i_pages, index);
2638
2639         folio_batch_init(&batch);
2640
2641         do {
2642                 /* Firstly, we gather up a batch of contiguous dirty pages
2643                  * under the RCU read lock - but we can't clear the dirty flags
2644                  * there if any of those pages are mapped.
2645                  */
2646                 rcu_read_lock();
2647
2648                 xas_for_each(&xas, folio, ULONG_MAX) {
2649                         stop = true;
2650                         if (xas_retry(&xas, folio))
2651                                 continue;
2652                         if (xa_is_value(folio))
2653                                 break;
2654                         if (folio->index != index)
2655                                 break;
2656                         if (!folio_try_get_rcu(folio)) {
2657                                 xas_reset(&xas);
2658                                 continue;
2659                         }
2660                         nr_pages = folio_nr_pages(folio);
2661                         if (nr_pages > max_pages)
2662                                 break;
2663
2664                         /* Has the page moved or been split? */
2665                         if (unlikely(folio != xas_reload(&xas))) {
2666                                 folio_put(folio);
2667                                 break;
2668                         }
2669
2670                         if (!folio_trylock(folio)) {
2671                                 folio_put(folio);
2672                                 break;
2673                         }
2674                         if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
2675                                 folio_unlock(folio);
2676                                 folio_put(folio);
2677                                 break;
2678                         }
2679
2680                         max_pages -= nr_pages;
2681                         psize = folio_size(folio);
2682                         len += psize;
2683                         stop = false;
2684                         if (max_pages <= 0 || len >= max_len || *_count <= 0)
2685                                 stop = true;
2686
2687                         index += nr_pages;
2688                         if (!folio_batch_add(&batch, folio))
2689                                 break;
2690                         if (stop)
2691                                 break;
2692                 }
2693
2694                 if (!stop)
2695                         xas_pause(&xas);
2696                 rcu_read_unlock();
2697
2698                 /* Now, if we obtained any pages, we can shift them to being
2699                  * writable and mark them for caching.
2700                  */
2701                 if (!folio_batch_count(&batch))
2702                         break;
2703
2704                 for (i = 0; i < folio_batch_count(&batch); i++) {
2705                         folio = batch.folios[i];
2706                         /* The folio should be locked, dirty and not undergoing
2707                          * writeback from the loop above.
2708                          */
2709                         if (!folio_clear_dirty_for_io(folio))
2710                                 WARN_ON(1);
2711                         folio_start_writeback(folio);
2712
2713                         *_count -= folio_nr_pages(folio);
2714                         folio_unlock(folio);
2715                 }
2716
2717                 folio_batch_release(&batch);
2718                 cond_resched();
2719         } while (!stop);
2720
2721         *_len = len;
2722 }
2723
2724 /*
2725  * Write back the locked page and any subsequent non-locked dirty pages.
2726  */
2727 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2728                                                  struct writeback_control *wbc,
2729                                                  struct folio *folio,
2730                                                  loff_t start, loff_t end)
2731 {
2732         struct inode *inode = mapping->host;
2733         struct TCP_Server_Info *server;
2734         struct cifs_writedata *wdata;
2735         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2736         struct cifs_credits credits_on_stack;
2737         struct cifs_credits *credits = &credits_on_stack;
2738         struct cifsFileInfo *cfile = NULL;
2739         unsigned int xid, wsize, len;
2740         loff_t i_size = i_size_read(inode);
2741         size_t max_len;
2742         long count = wbc->nr_to_write;
2743         int rc;
2744
2745         /* The folio should be locked, dirty and not undergoing writeback. */
2746         folio_start_writeback(folio);
2747
2748         count -= folio_nr_pages(folio);
2749         len = folio_size(folio);
2750
2751         xid = get_xid();
2752         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2753
2754         rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2755         if (rc) {
2756                 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2757                 goto err_xid;
2758         }
2759
2760         rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2761                                            &wsize, credits);
2762         if (rc != 0)
2763                 goto err_close;
2764
2765         wdata = cifs_writedata_alloc(cifs_writev_complete);
2766         if (!wdata) {
2767                 rc = -ENOMEM;
2768                 goto err_uncredit;
2769         }
2770
2771         wdata->sync_mode = wbc->sync_mode;
2772         wdata->offset = folio_pos(folio);
2773         wdata->pid = cfile->pid;
2774         wdata->credits = credits_on_stack;
2775         wdata->cfile = cfile;
2776         wdata->server = server;
2777         cfile = NULL;
2778
2779         /* Find all consecutive lockable dirty pages, stopping when we find a
2780          * page that is not immediately lockable, is not dirty or is missing,
2781          * or we reach the end of the range.
2782          */
2783         if (start < i_size) {
2784                 /* Trim the write to the EOF; the extra data is ignored.  Also
2785                  * put an upper limit on the size of a single storedata op.
2786                  */
2787                 max_len = wsize;
2788                 max_len = min_t(unsigned long long, max_len, end - start + 1);
2789                 max_len = min_t(unsigned long long, max_len, i_size - start);
2790
2791                 if (len < max_len) {
2792                         int max_pages = INT_MAX;
2793
2794 #ifdef CONFIG_CIFS_SMB_DIRECT
2795                         if (server->smbd_conn)
2796                                 max_pages = server->smbd_conn->max_frmr_depth;
2797 #endif
2798                         max_pages -= folio_nr_pages(folio);
2799
2800                         if (max_pages > 0)
2801                                 cifs_extend_writeback(mapping, &count, start,
2802                                                       max_pages, max_len, &len);
2803                 }
2804                 len = min_t(loff_t, len, max_len);
2805         }
2806
2807         wdata->bytes = len;
2808
2809         /* We now have a contiguous set of dirty pages, each with writeback
2810          * set; the first page is still locked at this point, but all the rest
2811          * have been unlocked.
2812          */
2813         folio_unlock(folio);
2814
2815         if (start < i_size) {
2816                 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2817                                 start, len);
2818
2819                 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2820                 if (rc)
2821                         goto err_wdata;
2822
2823                 if (wdata->cfile->invalidHandle)
2824                         rc = -EAGAIN;
2825                 else
2826                         rc = wdata->server->ops->async_writev(wdata,
2827                                                               cifs_writedata_release);
2828                 if (rc >= 0) {
2829                         kref_put(&wdata->refcount, cifs_writedata_release);
2830                         goto err_close;
2831                 }
2832         } else {
2833                 /* The dirty region was entirely beyond the EOF. */
2834                 cifs_pages_written_back(inode, start, len);
2835                 rc = 0;
2836         }
2837
2838 err_wdata:
2839         kref_put(&wdata->refcount, cifs_writedata_release);
2840 err_uncredit:
2841         add_credits_and_wake_if(server, credits, 0);
2842 err_close:
2843         if (cfile)
2844                 cifsFileInfo_put(cfile);
2845 err_xid:
2846         free_xid(xid);
2847         if (rc == 0) {
2848                 wbc->nr_to_write = count;
2849                 rc = len;
2850         } else if (is_retryable_error(rc)) {
2851                 cifs_pages_write_redirty(inode, start, len);
2852         } else {
2853                 cifs_pages_write_failed(inode, start, len);
2854                 mapping_set_error(mapping, rc);
2855         }
2856         /* Indication to update ctime and mtime as close is deferred */
2857         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2858         return rc;
2859 }
2860
2861 /*
2862  * write a region of pages back to the server
2863  */
2864 static int cifs_writepages_region(struct address_space *mapping,
2865                                   struct writeback_control *wbc,
2866                                   loff_t start, loff_t end, loff_t *_next)
2867 {
2868         struct folio_batch fbatch;
2869         int skips = 0;
2870
2871         folio_batch_init(&fbatch);
2872         do {
2873                 int nr;
2874                 pgoff_t index = start / PAGE_SIZE;
2875
2876                 nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
2877                                             PAGECACHE_TAG_DIRTY, &fbatch);
2878                 if (!nr)
2879                         break;
2880
2881                 for (int i = 0; i < nr; i++) {
2882                         ssize_t ret;
2883                         struct folio *folio = fbatch.folios[i];
2884
2885 redo_folio:
2886                         start = folio_pos(folio); /* May regress with THPs */
2887
2888                         /* At this point we hold neither the i_pages lock nor the
2889                          * page lock: the page may be truncated or invalidated
2890                          * (changing page->mapping to NULL), or even swizzled
2891                          * back from swapper_space to tmpfs file mapping
2892                          */
2893                         if (wbc->sync_mode != WB_SYNC_NONE) {
2894                                 ret = folio_lock_killable(folio);
2895                                 if (ret < 0)
2896                                         goto write_error;
2897                         } else {
2898                                 if (!folio_trylock(folio))
2899                                         goto skip_write;
2900                         }
2901
2902                         if (folio->mapping != mapping ||
2903                             !folio_test_dirty(folio)) {
2904                                 start += folio_size(folio);
2905                                 folio_unlock(folio);
2906                                 continue;
2907                         }
2908
2909                         if (folio_test_writeback(folio) ||
2910                             folio_test_fscache(folio)) {
2911                                 folio_unlock(folio);
2912                                 if (wbc->sync_mode == WB_SYNC_NONE)
2913                                         goto skip_write;
2914
2915                                 folio_wait_writeback(folio);
2916 #ifdef CONFIG_CIFS_FSCACHE
2917                                 folio_wait_fscache(folio);
2918 #endif
2919                                 goto redo_folio;
2920                         }
2921
2922                         if (!folio_clear_dirty_for_io(folio))
2923                                 /* We hold the page lock - it should've been dirty. */
2924                                 WARN_ON(1);
2925
2926                         ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
2927                         if (ret < 0)
2928                                 goto write_error;
2929
2930                         start += ret;
2931                         continue;
2932
2933 write_error:
2934                         folio_batch_release(&fbatch);
2935                         *_next = start;
2936                         return ret;
2937
2938 skip_write:
2939                         /*
2940                          * Too many skipped writes, or need to reschedule?
2941                          * Treat it as a write error without an error code.
2942                          */
2943                         if (skips >= 5 || need_resched()) {
2944                                 ret = 0;
2945                                 goto write_error;
2946                         }
2947
2948                         /* Otherwise, just skip that folio and go on to the next */
2949                         skips++;
2950                         start += folio_size(folio);
2951                         continue;
2952                 }
2953
2954                 folio_batch_release(&fbatch);           
2955                 cond_resched();
2956         } while (wbc->nr_to_write > 0);
2957
2958         *_next = start;
2959         return 0;
2960 }
2961
2962 /*
2963  * Write some of the pending data back to the server
2964  */
2965 static int cifs_writepages(struct address_space *mapping,
2966                            struct writeback_control *wbc)
2967 {
2968         loff_t start, next;
2969         int ret;
2970
2971         /* We have to be careful as we can end up racing with setattr()
2972          * truncating the pagecache since the caller doesn't take a lock here
2973          * to prevent it.
2974          */
2975
2976         if (wbc->range_cyclic) {
2977                 start = mapping->writeback_index * PAGE_SIZE;
2978                 ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
2979                 if (ret == 0) {
2980                         mapping->writeback_index = next / PAGE_SIZE;
2981                         if (start > 0 && wbc->nr_to_write > 0) {
2982                                 ret = cifs_writepages_region(mapping, wbc, 0,
2983                                                              start, &next);
2984                                 if (ret == 0)
2985                                         mapping->writeback_index =
2986                                                 next / PAGE_SIZE;
2987                         }
2988                 }
2989         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
2990                 ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
2991                 if (wbc->nr_to_write > 0 && ret == 0)
2992                         mapping->writeback_index = next / PAGE_SIZE;
2993         } else {
2994                 ret = cifs_writepages_region(mapping, wbc,
2995                                              wbc->range_start, wbc->range_end, &next);
2996         }
2997
2998         return ret;
2999 }
3000
3001 static int
3002 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3003 {
3004         int rc;
3005         unsigned int xid;
3006
3007         xid = get_xid();
3008 /* BB add check for wbc flags */
3009         get_page(page);
3010         if (!PageUptodate(page))
3011                 cifs_dbg(FYI, "ppw - page not up to date\n");
3012
3013         /*
3014          * Set the "writeback" flag, and clear "dirty" in the radix tree.
3015          *
3016          * A writepage() implementation always needs to do either this,
3017          * or re-dirty the page with "redirty_page_for_writepage()" in
3018          * the case of a failure.
3019          *
3020          * Just unlocking the page will cause the radix tree tag-bits
3021          * to fail to update with the state of the page correctly.
3022          */
3023         set_page_writeback(page);
3024 retry_write:
3025         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3026         if (is_retryable_error(rc)) {
3027                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3028                         goto retry_write;
3029                 redirty_page_for_writepage(wbc, page);
3030         } else if (rc != 0) {
3031                 SetPageError(page);
3032                 mapping_set_error(page->mapping, rc);
3033         } else {
3034                 SetPageUptodate(page);
3035         }
3036         end_page_writeback(page);
3037         put_page(page);
3038         free_xid(xid);
3039         return rc;
3040 }
3041
3042 static int cifs_write_end(struct file *file, struct address_space *mapping,
3043                         loff_t pos, unsigned len, unsigned copied,
3044                         struct page *page, void *fsdata)
3045 {
3046         int rc;
3047         struct inode *inode = mapping->host;
3048         struct cifsFileInfo *cfile = file->private_data;
3049         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3050         struct folio *folio = page_folio(page);
3051         __u32 pid;
3052
3053         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3054                 pid = cfile->pid;
3055         else
3056                 pid = current->tgid;
3057
3058         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3059                  page, pos, copied);
3060
3061         if (folio_test_checked(folio)) {
3062                 if (copied == len)
3063                         folio_mark_uptodate(folio);
3064                 folio_clear_checked(folio);
3065         } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3066                 folio_mark_uptodate(folio);
3067
3068         if (!folio_test_uptodate(folio)) {
3069                 char *page_data;
3070                 unsigned offset = pos & (PAGE_SIZE - 1);
3071                 unsigned int xid;
3072
3073                 xid = get_xid();
3074                 /* this is probably better than directly calling
3075                    partialpage_write since in this function the file handle is
3076                    known which we might as well leverage */
3077                 /* BB check if anything else missing out of ppw
3078                    such as updating last write time */
3079                 page_data = kmap(page);
3080                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3081                 /* if (rc < 0) should we set writebehind rc? */
3082                 kunmap(page);
3083
3084                 free_xid(xid);
3085         } else {
3086                 rc = copied;
3087                 pos += copied;
3088                 set_page_dirty(page);
3089         }
3090
3091         if (rc > 0) {
3092                 spin_lock(&inode->i_lock);
3093                 if (pos > inode->i_size) {
3094                         i_size_write(inode, pos);
3095                         inode->i_blocks = (512 - 1 + pos) >> 9;
3096                 }
3097                 spin_unlock(&inode->i_lock);
3098         }
3099
3100         unlock_page(page);
3101         put_page(page);
3102         /* Indication to update ctime and mtime as close is deferred */
3103         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3104
3105         return rc;
3106 }
3107
3108 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3109                       int datasync)
3110 {
3111         unsigned int xid;
3112         int rc = 0;
3113         struct cifs_tcon *tcon;
3114         struct TCP_Server_Info *server;
3115         struct cifsFileInfo *smbfile = file->private_data;
3116         struct inode *inode = file_inode(file);
3117         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3118
3119         rc = file_write_and_wait_range(file, start, end);
3120         if (rc) {
3121                 trace_cifs_fsync_err(inode->i_ino, rc);
3122                 return rc;
3123         }
3124
3125         xid = get_xid();
3126
3127         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3128                  file, datasync);
3129
3130         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3131                 rc = cifs_zap_mapping(inode);
3132                 if (rc) {
3133                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3134                         rc = 0; /* don't care about it in fsync */
3135                 }
3136         }
3137
3138         tcon = tlink_tcon(smbfile->tlink);
3139         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3140                 server = tcon->ses->server;
3141                 if (server->ops->flush == NULL) {
3142                         rc = -ENOSYS;
3143                         goto strict_fsync_exit;
3144                 }
3145
3146                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3147                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3148                         if (smbfile) {
3149                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3150                                 cifsFileInfo_put(smbfile);
3151                         } else
3152                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3153                 } else
3154                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3155         }
3156
3157 strict_fsync_exit:
3158         free_xid(xid);
3159         return rc;
3160 }
3161
3162 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3163 {
3164         unsigned int xid;
3165         int rc = 0;
3166         struct cifs_tcon *tcon;
3167         struct TCP_Server_Info *server;
3168         struct cifsFileInfo *smbfile = file->private_data;
3169         struct inode *inode = file_inode(file);
3170         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3171
3172         rc = file_write_and_wait_range(file, start, end);
3173         if (rc) {
3174                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3175                 return rc;
3176         }
3177
3178         xid = get_xid();
3179
3180         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3181                  file, datasync);
3182
3183         tcon = tlink_tcon(smbfile->tlink);
3184         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3185                 server = tcon->ses->server;
3186                 if (server->ops->flush == NULL) {
3187                         rc = -ENOSYS;
3188                         goto fsync_exit;
3189                 }
3190
3191                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3192                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3193                         if (smbfile) {
3194                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3195                                 cifsFileInfo_put(smbfile);
3196                         } else
3197                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3198                 } else
3199                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3200         }
3201
3202 fsync_exit:
3203         free_xid(xid);
3204         return rc;
3205 }
3206
3207 /*
3208  * As file closes, flush all cached write data for this inode checking
3209  * for write behind errors.
3210  */
3211 int cifs_flush(struct file *file, fl_owner_t id)
3212 {
3213         struct inode *inode = file_inode(file);
3214         int rc = 0;
3215
3216         if (file->f_mode & FMODE_WRITE)
3217                 rc = filemap_write_and_wait(inode->i_mapping);
3218
3219         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3220         if (rc) {
3221                 /* get more nuanced writeback errors */
3222                 rc = filemap_check_wb_err(file->f_mapping, 0);
3223                 trace_cifs_flush_err(inode->i_ino, rc);
3224         }
3225         return rc;
3226 }
3227
3228 static void
3229 cifs_uncached_writedata_release(struct kref *refcount)
3230 {
3231         struct cifs_writedata *wdata = container_of(refcount,
3232                                         struct cifs_writedata, refcount);
3233
3234         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3235         cifs_writedata_release(refcount);
3236 }
3237
3238 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3239
3240 static void
3241 cifs_uncached_writev_complete(struct work_struct *work)
3242 {
3243         struct cifs_writedata *wdata = container_of(work,
3244                                         struct cifs_writedata, work);
3245         struct inode *inode = d_inode(wdata->cfile->dentry);
3246         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3247
3248         spin_lock(&inode->i_lock);
3249         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3250         if (cifsi->netfs.remote_i_size > inode->i_size)
3251                 i_size_write(inode, cifsi->netfs.remote_i_size);
3252         spin_unlock(&inode->i_lock);
3253
3254         complete(&wdata->done);
3255         collect_uncached_write_data(wdata->ctx);
3256         /* the below call can possibly free the last ref to aio ctx */
3257         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3258 }
3259
3260 static int
3261 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3262         struct cifs_aio_ctx *ctx)
3263 {
3264         unsigned int wsize;
3265         struct cifs_credits credits;
3266         int rc;
3267         struct TCP_Server_Info *server = wdata->server;
3268
3269         do {
3270                 if (wdata->cfile->invalidHandle) {
3271                         rc = cifs_reopen_file(wdata->cfile, false);
3272                         if (rc == -EAGAIN)
3273                                 continue;
3274                         else if (rc)
3275                                 break;
3276                 }
3277
3278
3279                 /*
3280                  * Wait for credits to resend this wdata.
3281                  * Note: we are attempting to resend the whole wdata not in
3282                  * segments
3283                  */
3284                 do {
3285                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3286                                                 &wsize, &credits);
3287                         if (rc)
3288                                 goto fail;
3289
3290                         if (wsize < wdata->bytes) {
3291                                 add_credits_and_wake_if(server, &credits, 0);
3292                                 msleep(1000);
3293                         }
3294                 } while (wsize < wdata->bytes);
3295                 wdata->credits = credits;
3296
3297                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3298
3299                 if (!rc) {
3300                         if (wdata->cfile->invalidHandle)
3301                                 rc = -EAGAIN;
3302                         else {
3303                                 wdata->replay = true;
3304 #ifdef CONFIG_CIFS_SMB_DIRECT
3305                                 if (wdata->mr) {
3306                                         wdata->mr->need_invalidate = true;
3307                                         smbd_deregister_mr(wdata->mr);
3308                                         wdata->mr = NULL;
3309                                 }
3310 #endif
3311                                 rc = server->ops->async_writev(wdata,
3312                                         cifs_uncached_writedata_release);
3313                         }
3314                 }
3315
3316                 /* If the write was successfully sent, we are done */
3317                 if (!rc) {
3318                         list_add_tail(&wdata->list, wdata_list);
3319                         return 0;
3320                 }
3321
3322                 /* Roll back credits and retry if needed */
3323                 add_credits_and_wake_if(server, &wdata->credits, 0);
3324         } while (rc == -EAGAIN);
3325
3326 fail:
3327         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3328         return rc;
3329 }
3330
3331 /*
3332  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3333  * size and maximum number of segments.
3334  */
3335 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3336                                      size_t max_segs, unsigned int *_nsegs)
3337 {
3338         const struct bio_vec *bvecs = iter->bvec;
3339         unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3340         size_t len, span = 0, n = iter->count;
3341         size_t skip = iter->iov_offset;
3342
3343         if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3344                 return 0;
3345
3346         while (n && ix < nbv && skip) {
3347                 len = bvecs[ix].bv_len;
3348                 if (skip < len)
3349                         break;
3350                 skip -= len;
3351                 n -= len;
3352                 ix++;
3353         }
3354
3355         while (n && ix < nbv) {
3356                 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3357                 span += len;
3358                 max_size -= len;
3359                 nsegs++;
3360                 ix++;
3361                 if (max_size == 0 || nsegs >= max_segs)
3362                         break;
3363                 skip = 0;
3364                 n -= len;
3365         }
3366
3367         *_nsegs = nsegs;
3368         return span;
3369 }
3370
3371 static int
3372 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3373                      struct cifsFileInfo *open_file,
3374                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3375                      struct cifs_aio_ctx *ctx)
3376 {
3377         int rc = 0;
3378         size_t cur_len, max_len;
3379         struct cifs_writedata *wdata;
3380         pid_t pid;
3381         struct TCP_Server_Info *server;
3382         unsigned int xid, max_segs = INT_MAX;
3383
3384         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3385                 pid = open_file->pid;
3386         else
3387                 pid = current->tgid;
3388
3389         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3390         xid = get_xid();
3391
3392 #ifdef CONFIG_CIFS_SMB_DIRECT
3393         if (server->smbd_conn)
3394                 max_segs = server->smbd_conn->max_frmr_depth;
3395 #endif
3396
3397         do {
3398                 struct cifs_credits credits_on_stack;
3399                 struct cifs_credits *credits = &credits_on_stack;
3400                 unsigned int wsize, nsegs = 0;
3401
3402                 if (signal_pending(current)) {
3403                         rc = -EINTR;
3404                         break;
3405                 }
3406
3407                 if (open_file->invalidHandle) {
3408                         rc = cifs_reopen_file(open_file, false);
3409                         if (rc == -EAGAIN)
3410                                 continue;
3411                         else if (rc)
3412                                 break;
3413                 }
3414
3415                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3416                                                    &wsize, credits);
3417                 if (rc)
3418                         break;
3419
3420                 max_len = min_t(const size_t, len, wsize);
3421                 if (!max_len) {
3422                         rc = -EAGAIN;
3423                         add_credits_and_wake_if(server, credits, 0);
3424                         break;
3425                 }
3426
3427                 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3428                 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3429                          cur_len, max_len, nsegs, from->nr_segs, max_segs);
3430                 if (cur_len == 0) {
3431                         rc = -EIO;
3432                         add_credits_and_wake_if(server, credits, 0);
3433                         break;
3434                 }
3435
3436                 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3437                 if (!wdata) {
3438                         rc = -ENOMEM;
3439                         add_credits_and_wake_if(server, credits, 0);
3440                         break;
3441                 }
3442
3443                 wdata->sync_mode = WB_SYNC_ALL;
3444                 wdata->offset   = (__u64)fpos;
3445                 wdata->cfile    = cifsFileInfo_get(open_file);
3446                 wdata->server   = server;
3447                 wdata->pid      = pid;
3448                 wdata->bytes    = cur_len;
3449                 wdata->credits  = credits_on_stack;
3450                 wdata->iter     = *from;
3451                 wdata->ctx      = ctx;
3452                 kref_get(&ctx->refcount);
3453
3454                 iov_iter_truncate(&wdata->iter, cur_len);
3455
3456                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3457
3458                 if (!rc) {
3459                         if (wdata->cfile->invalidHandle)
3460                                 rc = -EAGAIN;
3461                         else
3462                                 rc = server->ops->async_writev(wdata,
3463                                         cifs_uncached_writedata_release);
3464                 }
3465
3466                 if (rc) {
3467                         add_credits_and_wake_if(server, &wdata->credits, 0);
3468                         kref_put(&wdata->refcount,
3469                                  cifs_uncached_writedata_release);
3470                         if (rc == -EAGAIN)
3471                                 continue;
3472                         break;
3473                 }
3474
3475                 list_add_tail(&wdata->list, wdata_list);
3476                 iov_iter_advance(from, cur_len);
3477                 fpos += cur_len;
3478                 len -= cur_len;
3479         } while (len > 0);
3480
3481         free_xid(xid);
3482         return rc;
3483 }
3484
3485 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3486 {
3487         struct cifs_writedata *wdata, *tmp;
3488         struct cifs_tcon *tcon;
3489         struct cifs_sb_info *cifs_sb;
3490         struct dentry *dentry = ctx->cfile->dentry;
3491         ssize_t rc;
3492
3493         tcon = tlink_tcon(ctx->cfile->tlink);
3494         cifs_sb = CIFS_SB(dentry->d_sb);
3495
3496         mutex_lock(&ctx->aio_mutex);
3497
3498         if (list_empty(&ctx->list)) {
3499                 mutex_unlock(&ctx->aio_mutex);
3500                 return;
3501         }
3502
3503         rc = ctx->rc;
3504         /*
3505          * Wait for and collect replies for any successful sends in order of
3506          * increasing offset. Once an error is hit, then return without waiting
3507          * for any more replies.
3508          */
3509 restart_loop:
3510         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3511                 if (!rc) {
3512                         if (!try_wait_for_completion(&wdata->done)) {
3513                                 mutex_unlock(&ctx->aio_mutex);
3514                                 return;
3515                         }
3516
3517                         if (wdata->result)
3518                                 rc = wdata->result;
3519                         else
3520                                 ctx->total_len += wdata->bytes;
3521
3522                         /* resend call if it's a retryable error */
3523                         if (rc == -EAGAIN) {
3524                                 struct list_head tmp_list;
3525                                 struct iov_iter tmp_from = ctx->iter;
3526
3527                                 INIT_LIST_HEAD(&tmp_list);
3528                                 list_del_init(&wdata->list);
3529
3530                                 if (ctx->direct_io)
3531                                         rc = cifs_resend_wdata(
3532                                                 wdata, &tmp_list, ctx);
3533                                 else {
3534                                         iov_iter_advance(&tmp_from,
3535                                                  wdata->offset - ctx->pos);
3536
3537                                         rc = cifs_write_from_iter(wdata->offset,
3538                                                 wdata->bytes, &tmp_from,
3539                                                 ctx->cfile, cifs_sb, &tmp_list,
3540                                                 ctx);
3541
3542                                         kref_put(&wdata->refcount,
3543                                                 cifs_uncached_writedata_release);
3544                                 }
3545
3546                                 list_splice(&tmp_list, &ctx->list);
3547                                 goto restart_loop;
3548                         }
3549                 }
3550                 list_del_init(&wdata->list);
3551                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3552         }
3553
3554         cifs_stats_bytes_written(tcon, ctx->total_len);
3555         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3556
3557         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3558
3559         mutex_unlock(&ctx->aio_mutex);
3560
3561         if (ctx->iocb && ctx->iocb->ki_complete)
3562                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3563         else
3564                 complete(&ctx->done);
3565 }
3566
3567 static ssize_t __cifs_writev(
3568         struct kiocb *iocb, struct iov_iter *from, bool direct)
3569 {
3570         struct file *file = iocb->ki_filp;
3571         ssize_t total_written = 0;
3572         struct cifsFileInfo *cfile;
3573         struct cifs_tcon *tcon;
3574         struct cifs_sb_info *cifs_sb;
3575         struct cifs_aio_ctx *ctx;
3576         int rc;
3577
3578         rc = generic_write_checks(iocb, from);
3579         if (rc <= 0)
3580                 return rc;
3581
3582         cifs_sb = CIFS_FILE_SB(file);
3583         cfile = file->private_data;
3584         tcon = tlink_tcon(cfile->tlink);
3585
3586         if (!tcon->ses->server->ops->async_writev)
3587                 return -ENOSYS;
3588
3589         ctx = cifs_aio_ctx_alloc();
3590         if (!ctx)
3591                 return -ENOMEM;
3592
3593         ctx->cfile = cifsFileInfo_get(cfile);
3594
3595         if (!is_sync_kiocb(iocb))
3596                 ctx->iocb = iocb;
3597
3598         ctx->pos = iocb->ki_pos;
3599         ctx->direct_io = direct;
3600         ctx->nr_pinned_pages = 0;
3601
3602         if (user_backed_iter(from)) {
3603                 /*
3604                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3605                  * they contain references to the calling process's virtual
3606                  * memory layout which won't be available in an async worker
3607                  * thread.  This also takes a pin on every folio involved.
3608                  */
3609                 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3610                                              &ctx->iter, 0);
3611                 if (rc < 0) {
3612                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3613                         return rc;
3614                 }
3615
3616                 ctx->nr_pinned_pages = rc;
3617                 ctx->bv = (void *)ctx->iter.bvec;
3618                 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3619         } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3620                    !is_sync_kiocb(iocb)) {
3621                 /*
3622                  * If the op is asynchronous, we need to copy the list attached
3623                  * to a BVEC/KVEC-type iterator, but we assume that the storage
3624                  * will be pinned by the caller; in any case, we may or may not
3625                  * be able to pin the pages, so we don't try.
3626                  */
3627                 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3628                 if (!ctx->bv) {
3629                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3630                         return -ENOMEM;
3631                 }
3632         } else {
3633                 /*
3634                  * Otherwise, we just pass the iterator down as-is and rely on
3635                  * the caller to make sure the pages referred to by the
3636                  * iterator don't evaporate.
3637                  */
3638                 ctx->iter = *from;
3639         }
3640
3641         ctx->len = iov_iter_count(&ctx->iter);
3642
3643         /* grab a lock here due to read response handlers can access ctx */
3644         mutex_lock(&ctx->aio_mutex);
3645
3646         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3647                                   cfile, cifs_sb, &ctx->list, ctx);
3648
3649         /*
3650          * If at least one write was successfully sent, then discard any rc
3651          * value from the later writes. If the other write succeeds, then
3652          * we'll end up returning whatever was written. If it fails, then
3653          * we'll get a new rc value from that.
3654          */
3655         if (!list_empty(&ctx->list))
3656                 rc = 0;
3657
3658         mutex_unlock(&ctx->aio_mutex);
3659
3660         if (rc) {
3661                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3662                 return rc;
3663         }
3664
3665         if (!is_sync_kiocb(iocb)) {
3666                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3667                 return -EIOCBQUEUED;
3668         }
3669
3670         rc = wait_for_completion_killable(&ctx->done);
3671         if (rc) {
3672                 mutex_lock(&ctx->aio_mutex);
3673                 ctx->rc = rc = -EINTR;
3674                 total_written = ctx->total_len;
3675                 mutex_unlock(&ctx->aio_mutex);
3676         } else {
3677                 rc = ctx->rc;
3678                 total_written = ctx->total_len;
3679         }
3680
3681         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3682
3683         if (unlikely(!total_written))
3684                 return rc;
3685
3686         iocb->ki_pos += total_written;
3687         return total_written;
3688 }
3689
3690 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3691 {
3692         struct file *file = iocb->ki_filp;
3693
3694         cifs_revalidate_mapping(file->f_inode);
3695         return __cifs_writev(iocb, from, true);
3696 }
3697
3698 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3699 {
3700         return __cifs_writev(iocb, from, false);
3701 }
3702
3703 static ssize_t
3704 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3705 {
3706         struct file *file = iocb->ki_filp;
3707         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3708         struct inode *inode = file->f_mapping->host;
3709         struct cifsInodeInfo *cinode = CIFS_I(inode);
3710         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3711         ssize_t rc;
3712
3713         inode_lock(inode);
3714         /*
3715          * We need to hold the sem to be sure nobody modifies lock list
3716          * with a brlock that prevents writing.
3717          */
3718         down_read(&cinode->lock_sem);
3719
3720         rc = generic_write_checks(iocb, from);
3721         if (rc <= 0)
3722                 goto out;
3723
3724         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3725                                      server->vals->exclusive_lock_type, 0,
3726                                      NULL, CIFS_WRITE_OP))
3727                 rc = __generic_file_write_iter(iocb, from);
3728         else
3729                 rc = -EACCES;
3730 out:
3731         up_read(&cinode->lock_sem);
3732         inode_unlock(inode);
3733
3734         if (rc > 0)
3735                 rc = generic_write_sync(iocb, rc);
3736         return rc;
3737 }
3738
3739 ssize_t
3740 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3741 {
3742         struct inode *inode = file_inode(iocb->ki_filp);
3743         struct cifsInodeInfo *cinode = CIFS_I(inode);
3744         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3745         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3746                                                 iocb->ki_filp->private_data;
3747         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3748         ssize_t written;
3749
3750         written = cifs_get_writer(cinode);
3751         if (written)
3752                 return written;
3753
3754         if (CIFS_CACHE_WRITE(cinode)) {
3755                 if (cap_unix(tcon->ses) &&
3756                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3757                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3758                         written = generic_file_write_iter(iocb, from);
3759                         goto out;
3760                 }
3761                 written = cifs_writev(iocb, from);
3762                 goto out;
3763         }
3764         /*
3765          * For non-oplocked files in strict cache mode we need to write the data
3766          * to the server exactly from the pos to pos+len-1 rather than flush all
3767          * affected pages because it may cause a error with mandatory locks on
3768          * these pages but not on the region from pos to ppos+len-1.
3769          */
3770         written = cifs_user_writev(iocb, from);
3771         if (CIFS_CACHE_READ(cinode)) {
3772                 /*
3773                  * We have read level caching and we have just sent a write
3774                  * request to the server thus making data in the cache stale.
3775                  * Zap the cache and set oplock/lease level to NONE to avoid
3776                  * reading stale data from the cache. All subsequent read
3777                  * operations will read new data from the server.
3778                  */
3779                 cifs_zap_mapping(inode);
3780                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3781                          inode);
3782                 cinode->oplock = 0;
3783         }
3784 out:
3785         cifs_put_writer(cinode);
3786         return written;
3787 }
3788
3789 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3790 {
3791         struct cifs_readdata *rdata;
3792
3793         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3794         if (rdata) {
3795                 kref_init(&rdata->refcount);
3796                 INIT_LIST_HEAD(&rdata->list);
3797                 init_completion(&rdata->done);
3798                 INIT_WORK(&rdata->work, complete);
3799         }
3800
3801         return rdata;
3802 }
3803
3804 void
3805 cifs_readdata_release(struct kref *refcount)
3806 {
3807         struct cifs_readdata *rdata = container_of(refcount,
3808                                         struct cifs_readdata, refcount);
3809
3810         if (rdata->ctx)
3811                 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3812 #ifdef CONFIG_CIFS_SMB_DIRECT
3813         if (rdata->mr) {
3814                 smbd_deregister_mr(rdata->mr);
3815                 rdata->mr = NULL;
3816         }
3817 #endif
3818         if (rdata->cfile)
3819                 cifsFileInfo_put(rdata->cfile);
3820
3821         kfree(rdata);
3822 }
3823
3824 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3825
3826 static void
3827 cifs_uncached_readv_complete(struct work_struct *work)
3828 {
3829         struct cifs_readdata *rdata = container_of(work,
3830                                                 struct cifs_readdata, work);
3831
3832         complete(&rdata->done);
3833         collect_uncached_read_data(rdata->ctx);
3834         /* the below call can possibly free the last ref to aio ctx */
3835         kref_put(&rdata->refcount, cifs_readdata_release);
3836 }
3837
3838 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3839                         struct list_head *rdata_list,
3840                         struct cifs_aio_ctx *ctx)
3841 {
3842         unsigned int rsize;
3843         struct cifs_credits credits;
3844         int rc;
3845         struct TCP_Server_Info *server;
3846
3847         /* XXX: should we pick a new channel here? */
3848         server = rdata->server;
3849
3850         do {
3851                 if (rdata->cfile->invalidHandle) {
3852                         rc = cifs_reopen_file(rdata->cfile, true);
3853                         if (rc == -EAGAIN)
3854                                 continue;
3855                         else if (rc)
3856                                 break;
3857                 }
3858
3859                 /*
3860                  * Wait for credits to resend this rdata.
3861                  * Note: we are attempting to resend the whole rdata not in
3862                  * segments
3863                  */
3864                 do {
3865                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3866                                                 &rsize, &credits);
3867
3868                         if (rc)
3869                                 goto fail;
3870
3871                         if (rsize < rdata->bytes) {
3872                                 add_credits_and_wake_if(server, &credits, 0);
3873                                 msleep(1000);
3874                         }
3875                 } while (rsize < rdata->bytes);
3876                 rdata->credits = credits;
3877
3878                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3879                 if (!rc) {
3880                         if (rdata->cfile->invalidHandle)
3881                                 rc = -EAGAIN;
3882                         else {
3883 #ifdef CONFIG_CIFS_SMB_DIRECT
3884                                 if (rdata->mr) {
3885                                         rdata->mr->need_invalidate = true;
3886                                         smbd_deregister_mr(rdata->mr);
3887                                         rdata->mr = NULL;
3888                                 }
3889 #endif
3890                                 rc = server->ops->async_readv(rdata);
3891                         }
3892                 }
3893
3894                 /* If the read was successfully sent, we are done */
3895                 if (!rc) {
3896                         /* Add to aio pending list */
3897                         list_add_tail(&rdata->list, rdata_list);
3898                         return 0;
3899                 }
3900
3901                 /* Roll back credits and retry if needed */
3902                 add_credits_and_wake_if(server, &rdata->credits, 0);
3903         } while (rc == -EAGAIN);
3904
3905 fail:
3906         kref_put(&rdata->refcount, cifs_readdata_release);
3907         return rc;
3908 }
3909
3910 static int
3911 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
3912                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3913                      struct cifs_aio_ctx *ctx)
3914 {
3915         struct cifs_readdata *rdata;
3916         unsigned int rsize, nsegs, max_segs = INT_MAX;
3917         struct cifs_credits credits_on_stack;
3918         struct cifs_credits *credits = &credits_on_stack;
3919         size_t cur_len, max_len;
3920         int rc;
3921         pid_t pid;
3922         struct TCP_Server_Info *server;
3923
3924         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3925
3926 #ifdef CONFIG_CIFS_SMB_DIRECT
3927         if (server->smbd_conn)
3928                 max_segs = server->smbd_conn->max_frmr_depth;
3929 #endif
3930
3931         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3932                 pid = open_file->pid;
3933         else
3934                 pid = current->tgid;
3935
3936         do {
3937                 if (open_file->invalidHandle) {
3938                         rc = cifs_reopen_file(open_file, true);
3939                         if (rc == -EAGAIN)
3940                                 continue;
3941                         else if (rc)
3942                                 break;
3943                 }
3944
3945                 if (cifs_sb->ctx->rsize == 0)
3946                         cifs_sb->ctx->rsize =
3947                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
3948                                                              cifs_sb->ctx);
3949
3950                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3951                                                    &rsize, credits);
3952                 if (rc)
3953                         break;
3954
3955                 max_len = min_t(size_t, len, rsize);
3956
3957                 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
3958                                                  max_segs, &nsegs);
3959                 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3960                          cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
3961                 if (cur_len == 0) {
3962                         rc = -EIO;
3963                         add_credits_and_wake_if(server, credits, 0);
3964                         break;
3965                 }
3966
3967                 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
3968                 if (!rdata) {
3969                         add_credits_and_wake_if(server, credits, 0);
3970                         rc = -ENOMEM;
3971                         break;
3972                 }
3973
3974                 rdata->server   = server;
3975                 rdata->cfile    = cifsFileInfo_get(open_file);
3976                 rdata->offset   = fpos;
3977                 rdata->bytes    = cur_len;
3978                 rdata->pid      = pid;
3979                 rdata->credits  = credits_on_stack;
3980                 rdata->ctx      = ctx;
3981                 kref_get(&ctx->refcount);
3982
3983                 rdata->iter     = ctx->iter;
3984                 iov_iter_truncate(&rdata->iter, cur_len);
3985
3986                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3987
3988                 if (!rc) {
3989                         if (rdata->cfile->invalidHandle)
3990                                 rc = -EAGAIN;
3991                         else
3992                                 rc = server->ops->async_readv(rdata);
3993                 }
3994
3995                 if (rc) {
3996                         add_credits_and_wake_if(server, &rdata->credits, 0);
3997                         kref_put(&rdata->refcount, cifs_readdata_release);
3998                         if (rc == -EAGAIN)
3999                                 continue;
4000                         break;
4001                 }
4002
4003                 list_add_tail(&rdata->list, rdata_list);
4004                 iov_iter_advance(&ctx->iter, cur_len);
4005                 fpos += cur_len;
4006                 len -= cur_len;
4007         } while (len > 0);
4008
4009         return rc;
4010 }
4011
4012 static void
4013 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4014 {
4015         struct cifs_readdata *rdata, *tmp;
4016         struct cifs_sb_info *cifs_sb;
4017         int rc;
4018
4019         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4020
4021         mutex_lock(&ctx->aio_mutex);
4022
4023         if (list_empty(&ctx->list)) {
4024                 mutex_unlock(&ctx->aio_mutex);
4025                 return;
4026         }
4027
4028         rc = ctx->rc;
4029         /* the loop below should proceed in the order of increasing offsets */
4030 again:
4031         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4032                 if (!rc) {
4033                         if (!try_wait_for_completion(&rdata->done)) {
4034                                 mutex_unlock(&ctx->aio_mutex);
4035                                 return;
4036                         }
4037
4038                         if (rdata->result == -EAGAIN) {
4039                                 /* resend call if it's a retryable error */
4040                                 struct list_head tmp_list;
4041                                 unsigned int got_bytes = rdata->got_bytes;
4042
4043                                 list_del_init(&rdata->list);
4044                                 INIT_LIST_HEAD(&tmp_list);
4045
4046                                 if (ctx->direct_io) {
4047                                         /*
4048                                          * Re-use rdata as this is a
4049                                          * direct I/O
4050                                          */
4051                                         rc = cifs_resend_rdata(
4052                                                 rdata,
4053                                                 &tmp_list, ctx);
4054                                 } else {
4055                                         rc = cifs_send_async_read(
4056                                                 rdata->offset + got_bytes,
4057                                                 rdata->bytes - got_bytes,
4058                                                 rdata->cfile, cifs_sb,
4059                                                 &tmp_list, ctx);
4060
4061                                         kref_put(&rdata->refcount,
4062                                                 cifs_readdata_release);
4063                                 }
4064
4065                                 list_splice(&tmp_list, &ctx->list);
4066
4067                                 goto again;
4068                         } else if (rdata->result)
4069                                 rc = rdata->result;
4070
4071                         /* if there was a short read -- discard anything left */
4072                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4073                                 rc = -ENODATA;
4074
4075                         ctx->total_len += rdata->got_bytes;
4076                 }
4077                 list_del_init(&rdata->list);
4078                 kref_put(&rdata->refcount, cifs_readdata_release);
4079         }
4080
4081         /* mask nodata case */
4082         if (rc == -ENODATA)
4083                 rc = 0;
4084
4085         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4086
4087         mutex_unlock(&ctx->aio_mutex);
4088
4089         if (ctx->iocb && ctx->iocb->ki_complete)
4090                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4091         else
4092                 complete(&ctx->done);
4093 }
4094
4095 static ssize_t __cifs_readv(
4096         struct kiocb *iocb, struct iov_iter *to, bool direct)
4097 {
4098         size_t len;
4099         struct file *file = iocb->ki_filp;
4100         struct cifs_sb_info *cifs_sb;
4101         struct cifsFileInfo *cfile;
4102         struct cifs_tcon *tcon;
4103         ssize_t rc, total_read = 0;
4104         loff_t offset = iocb->ki_pos;
4105         struct cifs_aio_ctx *ctx;
4106
4107         len = iov_iter_count(to);
4108         if (!len)
4109                 return 0;
4110
4111         cifs_sb = CIFS_FILE_SB(file);
4112         cfile = file->private_data;
4113         tcon = tlink_tcon(cfile->tlink);
4114
4115         if (!tcon->ses->server->ops->async_readv)
4116                 return -ENOSYS;
4117
4118         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4119                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4120
4121         ctx = cifs_aio_ctx_alloc();
4122         if (!ctx)
4123                 return -ENOMEM;
4124
4125         ctx->pos        = offset;
4126         ctx->direct_io  = direct;
4127         ctx->len        = len;
4128         ctx->cfile      = cifsFileInfo_get(cfile);
4129         ctx->nr_pinned_pages = 0;
4130
4131         if (!is_sync_kiocb(iocb))
4132                 ctx->iocb = iocb;
4133
4134         if (user_backed_iter(to)) {
4135                 /*
4136                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4137                  * they contain references to the calling process's virtual
4138                  * memory layout which won't be available in an async worker
4139                  * thread.  This also takes a pin on every folio involved.
4140                  */
4141                 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4142                                              &ctx->iter, 0);
4143                 if (rc < 0) {
4144                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4145                         return rc;
4146                 }
4147
4148                 ctx->nr_pinned_pages = rc;
4149                 ctx->bv = (void *)ctx->iter.bvec;
4150                 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4151                 ctx->should_dirty = true;
4152         } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4153                    !is_sync_kiocb(iocb)) {
4154                 /*
4155                  * If the op is asynchronous, we need to copy the list attached
4156                  * to a BVEC/KVEC-type iterator, but we assume that the storage
4157                  * will be retained by the caller; in any case, we may or may
4158                  * not be able to pin the pages, so we don't try.
4159                  */
4160                 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4161                 if (!ctx->bv) {
4162                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4163                         return -ENOMEM;
4164                 }
4165         } else {
4166                 /*
4167                  * Otherwise, we just pass the iterator down as-is and rely on
4168                  * the caller to make sure the pages referred to by the
4169                  * iterator don't evaporate.
4170                  */
4171                 ctx->iter = *to;
4172         }
4173
4174         if (direct) {
4175                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4176                                                   offset, offset + len - 1);
4177                 if (rc) {
4178                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4179                         return -EAGAIN;
4180                 }
4181         }
4182
4183         /* grab a lock here due to read response handlers can access ctx */
4184         mutex_lock(&ctx->aio_mutex);
4185
4186         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4187
4188         /* if at least one read request send succeeded, then reset rc */
4189         if (!list_empty(&ctx->list))
4190                 rc = 0;
4191
4192         mutex_unlock(&ctx->aio_mutex);
4193
4194         if (rc) {
4195                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4196                 return rc;
4197         }
4198
4199         if (!is_sync_kiocb(iocb)) {
4200                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4201                 return -EIOCBQUEUED;
4202         }
4203
4204         rc = wait_for_completion_killable(&ctx->done);
4205         if (rc) {
4206                 mutex_lock(&ctx->aio_mutex);
4207                 ctx->rc = rc = -EINTR;
4208                 total_read = ctx->total_len;
4209                 mutex_unlock(&ctx->aio_mutex);
4210         } else {
4211                 rc = ctx->rc;
4212                 total_read = ctx->total_len;
4213         }
4214
4215         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4216
4217         if (total_read) {
4218                 iocb->ki_pos += total_read;
4219                 return total_read;
4220         }
4221         return rc;
4222 }
4223
4224 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4225 {
4226         return __cifs_readv(iocb, to, true);
4227 }
4228
4229 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4230 {
4231         return __cifs_readv(iocb, to, false);
4232 }
4233
4234 ssize_t
4235 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4236 {
4237         struct inode *inode = file_inode(iocb->ki_filp);
4238         struct cifsInodeInfo *cinode = CIFS_I(inode);
4239         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4240         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4241                                                 iocb->ki_filp->private_data;
4242         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4243         int rc = -EACCES;
4244
4245         /*
4246          * In strict cache mode we need to read from the server all the time
4247          * if we don't have level II oplock because the server can delay mtime
4248          * change - so we can't make a decision about inode invalidating.
4249          * And we can also fail with pagereading if there are mandatory locks
4250          * on pages affected by this read but not on the region from pos to
4251          * pos+len-1.
4252          */
4253         if (!CIFS_CACHE_READ(cinode))
4254                 return cifs_user_readv(iocb, to);
4255
4256         if (cap_unix(tcon->ses) &&
4257             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4258             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4259                 return generic_file_read_iter(iocb, to);
4260
4261         /*
4262          * We need to hold the sem to be sure nobody modifies lock list
4263          * with a brlock that prevents reading.
4264          */
4265         down_read(&cinode->lock_sem);
4266         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4267                                      tcon->ses->server->vals->shared_lock_type,
4268                                      0, NULL, CIFS_READ_OP))
4269                 rc = generic_file_read_iter(iocb, to);
4270         up_read(&cinode->lock_sem);
4271         return rc;
4272 }
4273
4274 static ssize_t
4275 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4276 {
4277         int rc = -EACCES;
4278         unsigned int bytes_read = 0;
4279         unsigned int total_read;
4280         unsigned int current_read_size;
4281         unsigned int rsize;
4282         struct cifs_sb_info *cifs_sb;
4283         struct cifs_tcon *tcon;
4284         struct TCP_Server_Info *server;
4285         unsigned int xid;
4286         char *cur_offset;
4287         struct cifsFileInfo *open_file;
4288         struct cifs_io_parms io_parms = {0};
4289         int buf_type = CIFS_NO_BUFFER;
4290         __u32 pid;
4291
4292         xid = get_xid();
4293         cifs_sb = CIFS_FILE_SB(file);
4294
4295         /* FIXME: set up handlers for larger reads and/or convert to async */
4296         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4297
4298         if (file->private_data == NULL) {
4299                 rc = -EBADF;
4300                 free_xid(xid);
4301                 return rc;
4302         }
4303         open_file = file->private_data;
4304         tcon = tlink_tcon(open_file->tlink);
4305         server = cifs_pick_channel(tcon->ses);
4306
4307         if (!server->ops->sync_read) {
4308                 free_xid(xid);
4309                 return -ENOSYS;
4310         }
4311
4312         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4313                 pid = open_file->pid;
4314         else
4315                 pid = current->tgid;
4316
4317         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4318                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4319
4320         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4321              total_read += bytes_read, cur_offset += bytes_read) {
4322                 do {
4323                         current_read_size = min_t(uint, read_size - total_read,
4324                                                   rsize);
4325                         /*
4326                          * For windows me and 9x we do not want to request more
4327                          * than it negotiated since it will refuse the read
4328                          * then.
4329                          */
4330                         if (!(tcon->ses->capabilities &
4331                                 tcon->ses->server->vals->cap_large_files)) {
4332                                 current_read_size = min_t(uint,
4333                                         current_read_size, CIFSMaxBufSize);
4334                         }
4335                         if (open_file->invalidHandle) {
4336                                 rc = cifs_reopen_file(open_file, true);
4337                                 if (rc != 0)
4338                                         break;
4339                         }
4340                         io_parms.pid = pid;
4341                         io_parms.tcon = tcon;
4342                         io_parms.offset = *offset;
4343                         io_parms.length = current_read_size;
4344                         io_parms.server = server;
4345                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4346                                                     &bytes_read, &cur_offset,
4347                                                     &buf_type);
4348                 } while (rc == -EAGAIN);
4349
4350                 if (rc || (bytes_read == 0)) {
4351                         if (total_read) {
4352                                 break;
4353                         } else {
4354                                 free_xid(xid);
4355                                 return rc;
4356                         }
4357                 } else {
4358                         cifs_stats_bytes_read(tcon, total_read);
4359                         *offset += bytes_read;
4360                 }
4361         }
4362         free_xid(xid);
4363         return total_read;
4364 }
4365
4366 /*
4367  * If the page is mmap'ed into a process' page tables, then we need to make
4368  * sure that it doesn't change while being written back.
4369  */
4370 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4371 {
4372         struct folio *folio = page_folio(vmf->page);
4373
4374         /* Wait for the folio to be written to the cache before we allow it to
4375          * be modified.  We then assume the entire folio will need writing back.
4376          */
4377 #ifdef CONFIG_CIFS_FSCACHE
4378         if (folio_test_fscache(folio) &&
4379             folio_wait_fscache_killable(folio) < 0)
4380                 return VM_FAULT_RETRY;
4381 #endif
4382
4383         folio_wait_writeback(folio);
4384
4385         if (folio_lock_killable(folio) < 0)
4386                 return VM_FAULT_RETRY;
4387         return VM_FAULT_LOCKED;
4388 }
4389
4390 static const struct vm_operations_struct cifs_file_vm_ops = {
4391         .fault = filemap_fault,
4392         .map_pages = filemap_map_pages,
4393         .page_mkwrite = cifs_page_mkwrite,
4394 };
4395
4396 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4397 {
4398         int xid, rc = 0;
4399         struct inode *inode = file_inode(file);
4400
4401         xid = get_xid();
4402
4403         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4404                 rc = cifs_zap_mapping(inode);
4405         if (!rc)
4406                 rc = generic_file_mmap(file, vma);
4407         if (!rc)
4408                 vma->vm_ops = &cifs_file_vm_ops;
4409
4410         free_xid(xid);
4411         return rc;
4412 }
4413
4414 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4415 {
4416         int rc, xid;
4417
4418         xid = get_xid();
4419
4420         rc = cifs_revalidate_file(file);
4421         if (rc)
4422                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4423                          rc);
4424         if (!rc)
4425                 rc = generic_file_mmap(file, vma);
4426         if (!rc)
4427                 vma->vm_ops = &cifs_file_vm_ops;
4428
4429         free_xid(xid);
4430         return rc;
4431 }
4432
4433 /*
4434  * Unlock a bunch of folios in the pagecache.
4435  */
4436 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4437 {
4438         struct folio *folio;
4439         XA_STATE(xas, &mapping->i_pages, first);
4440
4441         rcu_read_lock();
4442         xas_for_each(&xas, folio, last) {
4443                 folio_unlock(folio);
4444         }
4445         rcu_read_unlock();
4446 }
4447
4448 static void cifs_readahead_complete(struct work_struct *work)
4449 {
4450         struct cifs_readdata *rdata = container_of(work,
4451                                                    struct cifs_readdata, work);
4452         struct folio *folio;
4453         pgoff_t last;
4454         bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4455
4456         XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4457
4458         if (good)
4459                 cifs_readahead_to_fscache(rdata->mapping->host,
4460                                           rdata->offset, rdata->bytes);
4461
4462         if (iov_iter_count(&rdata->iter) > 0)
4463                 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4464
4465         last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4466
4467         rcu_read_lock();
4468         xas_for_each(&xas, folio, last) {
4469                 if (good) {
4470                         flush_dcache_folio(folio);
4471                         folio_mark_uptodate(folio);
4472                 }
4473                 folio_unlock(folio);
4474         }
4475         rcu_read_unlock();
4476
4477         kref_put(&rdata->refcount, cifs_readdata_release);
4478 }
4479
4480 static void cifs_readahead(struct readahead_control *ractl)
4481 {
4482         struct cifsFileInfo *open_file = ractl->file->private_data;
4483         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4484         struct TCP_Server_Info *server;
4485         unsigned int xid, nr_pages, cache_nr_pages = 0;
4486         unsigned int ra_pages;
4487         pgoff_t next_cached = ULONG_MAX, ra_index;
4488         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4489                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4490         bool check_cache = caching;
4491         pid_t pid;
4492         int rc = 0;
4493
4494         /* Note that readahead_count() lags behind our dequeuing of pages from
4495          * the ractl, wo we have to keep track for ourselves.
4496          */
4497         ra_pages = readahead_count(ractl);
4498         ra_index = readahead_index(ractl);
4499
4500         xid = get_xid();
4501
4502         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4503                 pid = open_file->pid;
4504         else
4505                 pid = current->tgid;
4506
4507         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4508
4509         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4510                  __func__, ractl->file, ractl->mapping, ra_pages);
4511
4512         /*
4513          * Chop the readahead request up into rsize-sized read requests.
4514          */
4515         while ((nr_pages = ra_pages)) {
4516                 unsigned int i, rsize;
4517                 struct cifs_readdata *rdata;
4518                 struct cifs_credits credits_on_stack;
4519                 struct cifs_credits *credits = &credits_on_stack;
4520                 struct folio *folio;
4521                 pgoff_t fsize;
4522
4523                 /*
4524                  * Find out if we have anything cached in the range of
4525                  * interest, and if so, where the next chunk of cached data is.
4526                  */
4527                 if (caching) {
4528                         if (check_cache) {
4529                                 rc = cifs_fscache_query_occupancy(
4530                                         ractl->mapping->host, ra_index, nr_pages,
4531                                         &next_cached, &cache_nr_pages);
4532                                 if (rc < 0)
4533                                         caching = false;
4534                                 check_cache = false;
4535                         }
4536
4537                         if (ra_index == next_cached) {
4538                                 /*
4539                                  * TODO: Send a whole batch of pages to be read
4540                                  * by the cache.
4541                                  */
4542                                 folio = readahead_folio(ractl);
4543                                 fsize = folio_nr_pages(folio);
4544                                 ra_pages -= fsize;
4545                                 ra_index += fsize;
4546                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4547                                                                &folio->page) < 0) {
4548                                         /*
4549                                          * TODO: Deal with cache read failure
4550                                          * here, but for the moment, delegate
4551                                          * that to readpage.
4552                                          */
4553                                         caching = false;
4554                                 }
4555                                 folio_unlock(folio);
4556                                 next_cached += fsize;
4557                                 cache_nr_pages -= fsize;
4558                                 if (cache_nr_pages == 0)
4559                                         check_cache = true;
4560                                 continue;
4561                         }
4562                 }
4563
4564                 if (open_file->invalidHandle) {
4565                         rc = cifs_reopen_file(open_file, true);
4566                         if (rc) {
4567                                 if (rc == -EAGAIN)
4568                                         continue;
4569                                 break;
4570                         }
4571                 }
4572
4573                 if (cifs_sb->ctx->rsize == 0)
4574                         cifs_sb->ctx->rsize =
4575                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4576                                                              cifs_sb->ctx);
4577
4578                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4579                                                    &rsize, credits);
4580                 if (rc)
4581                         break;
4582                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4583                 if (next_cached != ULONG_MAX)
4584                         nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4585
4586                 /*
4587                  * Give up immediately if rsize is too small to read an entire
4588                  * page. The VFS will fall back to readpage. We should never
4589                  * reach this point however since we set ra_pages to 0 when the
4590                  * rsize is smaller than a cache page.
4591                  */
4592                 if (unlikely(!nr_pages)) {
4593                         add_credits_and_wake_if(server, credits, 0);
4594                         break;
4595                 }
4596
4597                 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4598                 if (!rdata) {
4599                         /* best to give up if we're out of mem */
4600                         add_credits_and_wake_if(server, credits, 0);
4601                         break;
4602                 }
4603
4604                 rdata->offset   = ra_index * PAGE_SIZE;
4605                 rdata->bytes    = nr_pages * PAGE_SIZE;
4606                 rdata->cfile    = cifsFileInfo_get(open_file);
4607                 rdata->server   = server;
4608                 rdata->mapping  = ractl->mapping;
4609                 rdata->pid      = pid;
4610                 rdata->credits  = credits_on_stack;
4611
4612                 for (i = 0; i < nr_pages; i++) {
4613                         if (!readahead_folio(ractl))
4614                                 WARN_ON(1);
4615                 }
4616                 ra_pages -= nr_pages;
4617                 ra_index += nr_pages;
4618
4619                 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4620                                 rdata->offset, rdata->bytes);
4621
4622                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4623                 if (!rc) {
4624                         if (rdata->cfile->invalidHandle)
4625                                 rc = -EAGAIN;
4626                         else
4627                                 rc = server->ops->async_readv(rdata);
4628                 }
4629
4630                 if (rc) {
4631                         add_credits_and_wake_if(server, &rdata->credits, 0);
4632                         cifs_unlock_folios(rdata->mapping,
4633                                            rdata->offset / PAGE_SIZE,
4634                                            (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4635                         /* Fallback to the readpage in error/reconnect cases */
4636                         kref_put(&rdata->refcount, cifs_readdata_release);
4637                         break;
4638                 }
4639
4640                 kref_put(&rdata->refcount, cifs_readdata_release);
4641         }
4642
4643         free_xid(xid);
4644 }
4645
4646 /*
4647  * cifs_readpage_worker must be called with the page pinned
4648  */
4649 static int cifs_readpage_worker(struct file *file, struct page *page,
4650         loff_t *poffset)
4651 {
4652         struct inode *inode = file_inode(file);
4653         struct timespec64 atime, mtime;
4654         char *read_data;
4655         int rc;
4656
4657         /* Is the page cached? */
4658         rc = cifs_readpage_from_fscache(inode, page);
4659         if (rc == 0)
4660                 goto read_complete;
4661
4662         read_data = kmap(page);
4663         /* for reads over a certain size could initiate async read ahead */
4664
4665         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4666
4667         if (rc < 0)
4668                 goto io_error;
4669         else
4670                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4671
4672         /* we do not want atime to be less than mtime, it broke some apps */
4673         atime = inode_set_atime_to_ts(inode, current_time(inode));
4674         mtime = inode_get_mtime(inode);
4675         if (timespec64_compare(&atime, &mtime) < 0)
4676                 inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4677
4678         if (PAGE_SIZE > rc)
4679                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4680
4681         flush_dcache_page(page);
4682         SetPageUptodate(page);
4683         rc = 0;
4684
4685 io_error:
4686         kunmap(page);
4687
4688 read_complete:
4689         unlock_page(page);
4690         return rc;
4691 }
4692
4693 static int cifs_read_folio(struct file *file, struct folio *folio)
4694 {
4695         struct page *page = &folio->page;
4696         loff_t offset = page_file_offset(page);
4697         int rc = -EACCES;
4698         unsigned int xid;
4699
4700         xid = get_xid();
4701
4702         if (file->private_data == NULL) {
4703                 rc = -EBADF;
4704                 free_xid(xid);
4705                 return rc;
4706         }
4707
4708         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4709                  page, (int)offset, (int)offset);
4710
4711         rc = cifs_readpage_worker(file, page, &offset);
4712
4713         free_xid(xid);
4714         return rc;
4715 }
4716
4717 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4718 {
4719         struct cifsFileInfo *open_file;
4720
4721         spin_lock(&cifs_inode->open_file_lock);
4722         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4723                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4724                         spin_unlock(&cifs_inode->open_file_lock);
4725                         return 1;
4726                 }
4727         }
4728         spin_unlock(&cifs_inode->open_file_lock);
4729         return 0;
4730 }
4731
4732 /* We do not want to update the file size from server for inodes
4733    open for write - to avoid races with writepage extending
4734    the file - in the future we could consider allowing
4735    refreshing the inode only on increases in the file size
4736    but this is tricky to do without racing with writebehind
4737    page caching in the current Linux kernel design */
4738 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4739 {
4740         if (!cifsInode)
4741                 return true;
4742
4743         if (is_inode_writable(cifsInode)) {
4744                 /* This inode is open for write at least once */
4745                 struct cifs_sb_info *cifs_sb;
4746
4747                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4748                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4749                         /* since no page cache to corrupt on directio
4750                         we can change size safely */
4751                         return true;
4752                 }
4753
4754                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4755                         return true;
4756
4757                 return false;
4758         } else
4759                 return true;
4760 }
4761
4762 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4763                         loff_t pos, unsigned len,
4764                         struct page **pagep, void **fsdata)
4765 {
4766         int oncethru = 0;
4767         pgoff_t index = pos >> PAGE_SHIFT;
4768         loff_t offset = pos & (PAGE_SIZE - 1);
4769         loff_t page_start = pos & PAGE_MASK;
4770         loff_t i_size;
4771         struct page *page;
4772         int rc = 0;
4773
4774         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4775
4776 start:
4777         page = grab_cache_page_write_begin(mapping, index);
4778         if (!page) {
4779                 rc = -ENOMEM;
4780                 goto out;
4781         }
4782
4783         if (PageUptodate(page))
4784                 goto out;
4785
4786         /*
4787          * If we write a full page it will be up to date, no need to read from
4788          * the server. If the write is short, we'll end up doing a sync write
4789          * instead.
4790          */
4791         if (len == PAGE_SIZE)
4792                 goto out;
4793
4794         /*
4795          * optimize away the read when we have an oplock, and we're not
4796          * expecting to use any of the data we'd be reading in. That
4797          * is, when the page lies beyond the EOF, or straddles the EOF
4798          * and the write will cover all of the existing data.
4799          */
4800         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4801                 i_size = i_size_read(mapping->host);
4802                 if (page_start >= i_size ||
4803                     (offset == 0 && (pos + len) >= i_size)) {
4804                         zero_user_segments(page, 0, offset,
4805                                            offset + len,
4806                                            PAGE_SIZE);
4807                         /*
4808                          * PageChecked means that the parts of the page
4809                          * to which we're not writing are considered up
4810                          * to date. Once the data is copied to the
4811                          * page, it can be set uptodate.
4812                          */
4813                         SetPageChecked(page);
4814                         goto out;
4815                 }
4816         }
4817
4818         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4819                 /*
4820                  * might as well read a page, it is fast enough. If we get
4821                  * an error, we don't need to return it. cifs_write_end will
4822                  * do a sync write instead since PG_uptodate isn't set.
4823                  */
4824                 cifs_readpage_worker(file, page, &page_start);
4825                 put_page(page);
4826                 oncethru = 1;
4827                 goto start;
4828         } else {
4829                 /* we could try using another file handle if there is one -
4830                    but how would we lock it to prevent close of that handle
4831                    racing with this read? In any case
4832                    this will be written out by write_end so is fine */
4833         }
4834 out:
4835         *pagep = page;
4836         return rc;
4837 }
4838
4839 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4840 {
4841         if (folio_test_private(folio))
4842                 return 0;
4843         if (folio_test_fscache(folio)) {
4844                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4845                         return false;
4846                 folio_wait_fscache(folio);
4847         }
4848         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4849         return true;
4850 }
4851
4852 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4853                                  size_t length)
4854 {
4855         folio_wait_fscache(folio);
4856 }
4857
4858 static int cifs_launder_folio(struct folio *folio)
4859 {
4860         int rc = 0;
4861         loff_t range_start = folio_pos(folio);
4862         loff_t range_end = range_start + folio_size(folio);
4863         struct writeback_control wbc = {
4864                 .sync_mode = WB_SYNC_ALL,
4865                 .nr_to_write = 0,
4866                 .range_start = range_start,
4867                 .range_end = range_end,
4868         };
4869
4870         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4871
4872         if (folio_clear_dirty_for_io(folio))
4873                 rc = cifs_writepage_locked(&folio->page, &wbc);
4874
4875         folio_wait_fscache(folio);
4876         return rc;
4877 }
4878
4879 void cifs_oplock_break(struct work_struct *work)
4880 {
4881         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4882                                                   oplock_break);
4883         struct inode *inode = d_inode(cfile->dentry);
4884         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4885         struct cifsInodeInfo *cinode = CIFS_I(inode);
4886         struct cifs_tcon *tcon;
4887         struct TCP_Server_Info *server;
4888         struct tcon_link *tlink;
4889         int rc = 0;
4890         bool purge_cache = false, oplock_break_cancelled;
4891         __u64 persistent_fid, volatile_fid;
4892         __u16 net_fid;
4893
4894         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4895                         TASK_UNINTERRUPTIBLE);
4896
4897         tlink = cifs_sb_tlink(cifs_sb);
4898         if (IS_ERR(tlink))
4899                 goto out;
4900         tcon = tlink_tcon(tlink);
4901         server = tcon->ses->server;
4902
4903         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4904                                       cfile->oplock_epoch, &purge_cache);
4905
4906         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4907                                                 cifs_has_mand_locks(cinode)) {
4908                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4909                          inode);
4910                 cinode->oplock = 0;
4911         }
4912
4913         if (inode && S_ISREG(inode->i_mode)) {
4914                 if (CIFS_CACHE_READ(cinode))
4915                         break_lease(inode, O_RDONLY);
4916                 else
4917                         break_lease(inode, O_WRONLY);
4918                 rc = filemap_fdatawrite(inode->i_mapping);
4919                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4920                         rc = filemap_fdatawait(inode->i_mapping);
4921                         mapping_set_error(inode->i_mapping, rc);
4922                         cifs_zap_mapping(inode);
4923                 }
4924                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4925                 if (CIFS_CACHE_WRITE(cinode))
4926                         goto oplock_break_ack;
4927         }
4928
4929         rc = cifs_push_locks(cfile);
4930         if (rc)
4931                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4932
4933 oplock_break_ack:
4934         /*
4935          * When oplock break is received and there are no active
4936          * file handles but cached, then schedule deferred close immediately.
4937          * So, new open will not use cached handle.
4938          */
4939
4940         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
4941                 cifs_close_deferred_file(cinode);
4942
4943         persistent_fid = cfile->fid.persistent_fid;
4944         volatile_fid = cfile->fid.volatile_fid;
4945         net_fid = cfile->fid.netfid;
4946         oplock_break_cancelled = cfile->oplock_break_cancelled;
4947
4948         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4949         /*
4950          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
4951          * an acknowledgment to be sent when the file has already been closed.
4952          */
4953         spin_lock(&cinode->open_file_lock);
4954         /* check list empty since can race with kill_sb calling tree disconnect */
4955         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
4956                 spin_unlock(&cinode->open_file_lock);
4957                 rc = server->ops->oplock_response(tcon, persistent_fid,
4958                                                   volatile_fid, net_fid, cinode);
4959                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4960         } else
4961                 spin_unlock(&cinode->open_file_lock);
4962
4963         cifs_put_tlink(tlink);
4964 out:
4965         cifs_done_oplock_break(cinode);
4966 }
4967
4968 /*
4969  * The presence of cifs_direct_io() in the address space ops vector
4970  * allowes open() O_DIRECT flags which would have failed otherwise.
4971  *
4972  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4973  * so this method should never be called.
4974  *
4975  * Direct IO is not yet supported in the cached mode.
4976  */
4977 static ssize_t
4978 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4979 {
4980         /*
4981          * FIXME
4982          * Eventually need to support direct IO for non forcedirectio mounts
4983          */
4984         return -EINVAL;
4985 }
4986
4987 static int cifs_swap_activate(struct swap_info_struct *sis,
4988                               struct file *swap_file, sector_t *span)
4989 {
4990         struct cifsFileInfo *cfile = swap_file->private_data;
4991         struct inode *inode = swap_file->f_mapping->host;
4992         unsigned long blocks;
4993         long long isize;
4994
4995         cifs_dbg(FYI, "swap activate\n");
4996
4997         if (!swap_file->f_mapping->a_ops->swap_rw)
4998                 /* Cannot support swap */
4999                 return -EINVAL;
5000
5001         spin_lock(&inode->i_lock);
5002         blocks = inode->i_blocks;
5003         isize = inode->i_size;
5004         spin_unlock(&inode->i_lock);
5005         if (blocks*512 < isize) {
5006                 pr_warn("swap activate: swapfile has holes\n");
5007                 return -EINVAL;
5008         }
5009         *span = sis->pages;
5010
5011         pr_warn_once("Swap support over SMB3 is experimental\n");
5012
5013         /*
5014          * TODO: consider adding ACL (or documenting how) to prevent other
5015          * users (on this or other systems) from reading it
5016          */
5017
5018
5019         /* TODO: add sk_set_memalloc(inet) or similar */
5020
5021         if (cfile)
5022                 cfile->swapfile = true;
5023         /*
5024          * TODO: Since file already open, we can't open with DENY_ALL here
5025          * but we could add call to grab a byte range lock to prevent others
5026          * from reading or writing the file
5027          */
5028
5029         sis->flags |= SWP_FS_OPS;
5030         return add_swap_extent(sis, 0, sis->max, 0);
5031 }
5032
5033 static void cifs_swap_deactivate(struct file *file)
5034 {
5035         struct cifsFileInfo *cfile = file->private_data;
5036
5037         cifs_dbg(FYI, "swap deactivate\n");
5038
5039         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5040
5041         if (cfile)
5042                 cfile->swapfile = false;
5043
5044         /* do we need to unpin (or unlock) the file */
5045 }
5046
5047 const struct address_space_operations cifs_addr_ops = {
5048         .read_folio = cifs_read_folio,
5049         .readahead = cifs_readahead,
5050         .writepages = cifs_writepages,
5051         .write_begin = cifs_write_begin,
5052         .write_end = cifs_write_end,
5053         .dirty_folio = netfs_dirty_folio,
5054         .release_folio = cifs_release_folio,
5055         .direct_IO = cifs_direct_io,
5056         .invalidate_folio = cifs_invalidate_folio,
5057         .launder_folio = cifs_launder_folio,
5058         .migrate_folio = filemap_migrate_folio,
5059         /*
5060          * TODO: investigate and if useful we could add an is_dirty_writeback
5061          * helper if needed
5062          */
5063         .swap_activate = cifs_swap_activate,
5064         .swap_deactivate = cifs_swap_deactivate,
5065 };
5066
5067 /*
5068  * cifs_readahead requires the server to support a buffer large enough to
5069  * contain the header plus one complete page of data.  Otherwise, we need
5070  * to leave cifs_readahead out of the address space operations.
5071  */
5072 const struct address_space_operations cifs_addr_ops_smallbuf = {
5073         .read_folio = cifs_read_folio,
5074         .writepages = cifs_writepages,
5075         .write_begin = cifs_write_begin,
5076         .write_end = cifs_write_end,
5077         .dirty_folio = netfs_dirty_folio,
5078         .release_folio = cifs_release_folio,
5079         .invalidate_folio = cifs_invalidate_folio,
5080         .launder_folio = cifs_launder_folio,
5081         .migrate_folio = filemap_migrate_folio,
5082 };