Merge tag 'driver-core-6.9-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / fs / smb / client / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45         struct address_space *mapping = inode->i_mapping;
46         struct folio *folio;
47         pgoff_t end;
48
49         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51         rcu_read_lock();
52
53         end = (start + len - 1) / PAGE_SIZE;
54         xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55                 if (xas_retry(&xas, folio))
56                         continue;
57                 xas_pause(&xas);
58                 rcu_read_unlock();
59                 folio_lock(folio);
60                 folio_clear_dirty_for_io(folio);
61                 folio_unlock(folio);
62                 rcu_read_lock();
63         }
64
65         rcu_read_unlock();
66 }
67
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73         struct address_space *mapping = inode->i_mapping;
74         struct folio *folio;
75         pgoff_t end;
76
77         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79         if (!len)
80                 return;
81
82         rcu_read_lock();
83
84         end = (start + len - 1) / PAGE_SIZE;
85         xas_for_each(&xas, folio, end) {
86                 if (xas_retry(&xas, folio))
87                         continue;
88                 if (!folio_test_writeback(folio)) {
89                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90                                   len, start, folio->index, end);
91                         continue;
92                 }
93
94                 folio_detach_private(folio);
95                 folio_end_writeback(folio);
96         }
97
98         rcu_read_unlock();
99 }
100
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106         struct address_space *mapping = inode->i_mapping;
107         struct folio *folio;
108         pgoff_t end;
109
110         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112         if (!len)
113                 return;
114
115         rcu_read_lock();
116
117         end = (start + len - 1) / PAGE_SIZE;
118         xas_for_each(&xas, folio, end) {
119                 if (xas_retry(&xas, folio))
120                         continue;
121                 if (!folio_test_writeback(folio)) {
122                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123                                   len, start, folio->index, end);
124                         continue;
125                 }
126
127                 folio_set_error(folio);
128                 folio_end_writeback(folio);
129         }
130
131         rcu_read_unlock();
132 }
133
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139         struct address_space *mapping = inode->i_mapping;
140         struct folio *folio;
141         pgoff_t end;
142
143         XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145         if (!len)
146                 return;
147
148         rcu_read_lock();
149
150         end = (start + len - 1) / PAGE_SIZE;
151         xas_for_each(&xas, folio, end) {
152                 if (!folio_test_writeback(folio)) {
153                         WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154                                   len, start, folio->index, end);
155                         continue;
156                 }
157
158                 filemap_dirty_folio(folio->mapping, folio);
159                 folio_end_writeback(folio);
160         }
161
162         rcu_read_unlock();
163 }
164
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172         struct cifsFileInfo *open_file = NULL;
173         struct list_head *tmp;
174         struct list_head *tmp1;
175
176         /* only send once per connect */
177         spin_lock(&tcon->tc_lock);
178         if (tcon->need_reconnect)
179                 tcon->status = TID_NEED_RECON;
180
181         if (tcon->status != TID_NEED_RECON) {
182                 spin_unlock(&tcon->tc_lock);
183                 return;
184         }
185         tcon->status = TID_IN_FILES_INVALIDATE;
186         spin_unlock(&tcon->tc_lock);
187
188         /* list all files open on tree connection and mark them invalid */
189         spin_lock(&tcon->open_file_lock);
190         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192                 open_file->invalidHandle = true;
193                 open_file->oplock_break_cancelled = true;
194         }
195         spin_unlock(&tcon->open_file_lock);
196
197         invalidate_all_cached_dirs(tcon);
198         spin_lock(&tcon->tc_lock);
199         if (tcon->status == TID_IN_FILES_INVALIDATE)
200                 tcon->status = TID_NEED_TCON;
201         spin_unlock(&tcon->tc_lock);
202
203         /*
204          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205          * to this tcon.
206          */
207 }
208
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211         if ((flags & O_ACCMODE) == O_RDONLY)
212                 return GENERIC_READ;
213         else if ((flags & O_ACCMODE) == O_WRONLY)
214                 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215         else if ((flags & O_ACCMODE) == O_RDWR) {
216                 /* GENERIC_ALL is too much permission to request
217                    can cause unnecessary access denied on create */
218                 /* return GENERIC_ALL; */
219                 return (GENERIC_READ | GENERIC_WRITE);
220         }
221
222         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224                 FILE_READ_DATA);
225 }
226
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230         u32 posix_flags = 0;
231
232         if ((flags & O_ACCMODE) == O_RDONLY)
233                 posix_flags = SMB_O_RDONLY;
234         else if ((flags & O_ACCMODE) == O_WRONLY)
235                 posix_flags = SMB_O_WRONLY;
236         else if ((flags & O_ACCMODE) == O_RDWR)
237                 posix_flags = SMB_O_RDWR;
238
239         if (flags & O_CREAT) {
240                 posix_flags |= SMB_O_CREAT;
241                 if (flags & O_EXCL)
242                         posix_flags |= SMB_O_EXCL;
243         } else if (flags & O_EXCL)
244                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245                          current->comm, current->tgid);
246
247         if (flags & O_TRUNC)
248                 posix_flags |= SMB_O_TRUNC;
249         /* be safe and imply O_SYNC for O_DSYNC */
250         if (flags & O_DSYNC)
251                 posix_flags |= SMB_O_SYNC;
252         if (flags & O_DIRECTORY)
253                 posix_flags |= SMB_O_DIRECTORY;
254         if (flags & O_NOFOLLOW)
255                 posix_flags |= SMB_O_NOFOLLOW;
256         if (flags & O_DIRECT)
257                 posix_flags |= SMB_O_DIRECT;
258
259         return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266                 return FILE_CREATE;
267         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268                 return FILE_OVERWRITE_IF;
269         else if ((flags & O_CREAT) == O_CREAT)
270                 return FILE_OPEN_IF;
271         else if ((flags & O_TRUNC) == O_TRUNC)
272                 return FILE_OVERWRITE;
273         else
274                 return FILE_OPEN;
275 }
276
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279                         struct super_block *sb, int mode, unsigned int f_flags,
280                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282         int rc;
283         FILE_UNIX_BASIC_INFO *presp_data;
284         __u32 posix_flags = 0;
285         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286         struct cifs_fattr fattr;
287         struct tcon_link *tlink;
288         struct cifs_tcon *tcon;
289
290         cifs_dbg(FYI, "posix open %s\n", full_path);
291
292         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293         if (presp_data == NULL)
294                 return -ENOMEM;
295
296         tlink = cifs_sb_tlink(cifs_sb);
297         if (IS_ERR(tlink)) {
298                 rc = PTR_ERR(tlink);
299                 goto posix_open_ret;
300         }
301
302         tcon = tlink_tcon(tlink);
303         mode &= ~current_umask();
304
305         posix_flags = cifs_posix_convert_flags(f_flags);
306         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307                              poplock, full_path, cifs_sb->local_nls,
308                              cifs_remap(cifs_sb));
309         cifs_put_tlink(tlink);
310
311         if (rc)
312                 goto posix_open_ret;
313
314         if (presp_data->Type == cpu_to_le32(-1))
315                 goto posix_open_ret; /* open ok, caller does qpathinfo */
316
317         if (!pinode)
318                 goto posix_open_ret; /* caller does not need info */
319
320         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321
322         /* get new inode and set it up */
323         if (*pinode == NULL) {
324                 cifs_fill_uniqueid(sb, &fattr);
325                 *pinode = cifs_iget(sb, &fattr);
326                 if (!*pinode) {
327                         rc = -ENOMEM;
328                         goto posix_open_ret;
329                 }
330         } else {
331                 cifs_revalidate_mapping(*pinode);
332                 rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333         }
334
335 posix_open_ret:
336         kfree(presp_data);
337         return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345         int rc;
346         int desired_access;
347         int disposition;
348         int create_options = CREATE_NOT_DIR;
349         struct TCP_Server_Info *server = tcon->ses->server;
350         struct cifs_open_parms oparms;
351         int rdwr_for_fscache = 0;
352
353         if (!server->ops->open)
354                 return -ENOSYS;
355
356         /* If we're caching, we need to be able to fill in around partial writes. */
357         if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358                 rdwr_for_fscache = 1;
359
360         desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *      POSIX Flag            CIFS Disposition
366  *      ----------            ----------------
367  *      O_CREAT               FILE_OPEN_IF
368  *      O_CREAT | O_EXCL      FILE_CREATE
369  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *      O_TRUNC               FILE_OVERWRITE
371  *      none of the above     FILE_OPEN
372  *
373  *      Note that there is not a direct match between disposition
374  *      FILE_SUPERSEDE (ie create whether or not file exists although
375  *      O_CREAT | O_TRUNC is similar but truncates the existing
376  *      file rather than creating a new file as FILE_SUPERSEDE does
377  *      (which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385
386         disposition = cifs_get_disposition(f_flags);
387
388         /* BB pass O_SYNC flag through on file attributes .. BB */
389
390         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
391         if (f_flags & O_SYNC)
392                 create_options |= CREATE_WRITE_THROUGH;
393
394         if (f_flags & O_DIRECT)
395                 create_options |= CREATE_NO_BUFFER;
396
397 retry_open:
398         oparms = (struct cifs_open_parms) {
399                 .tcon = tcon,
400                 .cifs_sb = cifs_sb,
401                 .desired_access = desired_access,
402                 .create_options = cifs_create_options(cifs_sb, create_options),
403                 .disposition = disposition,
404                 .path = full_path,
405                 .fid = fid,
406         };
407
408         rc = server->ops->open(xid, &oparms, oplock, buf);
409         if (rc) {
410                 if (rc == -EACCES && rdwr_for_fscache == 1) {
411                         desired_access = cifs_convert_flags(f_flags, 0);
412                         rdwr_for_fscache = 2;
413                         goto retry_open;
414                 }
415                 return rc;
416         }
417         if (rdwr_for_fscache == 2)
418                 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419
420         /* TODO: Add support for calling posix query info but with passing in fid */
421         if (tcon->unix_ext)
422                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423                                               xid);
424         else
425                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426                                          xid, fid);
427
428         if (rc) {
429                 server->ops->close(xid, tcon, fid);
430                 if (rc == -ESTALE)
431                         rc = -EOPENSTALE;
432         }
433
434         return rc;
435 }
436
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440         struct cifs_fid_locks *cur;
441         bool has_locks = false;
442
443         down_read(&cinode->lock_sem);
444         list_for_each_entry(cur, &cinode->llist, llist) {
445                 if (!list_empty(&cur->locks)) {
446                         has_locks = true;
447                         break;
448                 }
449         }
450         up_read(&cinode->lock_sem);
451         return has_locks;
452 }
453
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457         while (!down_write_trylock(sem))
458                 msleep(10);
459 }
460
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465                                        struct tcon_link *tlink, __u32 oplock,
466                                        const char *symlink_target)
467 {
468         struct dentry *dentry = file_dentry(file);
469         struct inode *inode = d_inode(dentry);
470         struct cifsInodeInfo *cinode = CIFS_I(inode);
471         struct cifsFileInfo *cfile;
472         struct cifs_fid_locks *fdlocks;
473         struct cifs_tcon *tcon = tlink_tcon(tlink);
474         struct TCP_Server_Info *server = tcon->ses->server;
475
476         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477         if (cfile == NULL)
478                 return cfile;
479
480         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481         if (!fdlocks) {
482                 kfree(cfile);
483                 return NULL;
484         }
485
486         if (symlink_target) {
487                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488                 if (!cfile->symlink_target) {
489                         kfree(fdlocks);
490                         kfree(cfile);
491                         return NULL;
492                 }
493         }
494
495         INIT_LIST_HEAD(&fdlocks->locks);
496         fdlocks->cfile = cfile;
497         cfile->llist = fdlocks;
498
499         cfile->count = 1;
500         cfile->pid = current->tgid;
501         cfile->uid = current_fsuid();
502         cfile->dentry = dget(dentry);
503         cfile->f_flags = file->f_flags;
504         cfile->invalidHandle = false;
505         cfile->deferred_close_scheduled = false;
506         cfile->tlink = cifs_get_tlink(tlink);
507         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509         INIT_WORK(&cfile->serverclose, serverclose_work);
510         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511         mutex_init(&cfile->fh_mutex);
512         spin_lock_init(&cfile->file_info_lock);
513
514         cifs_sb_active(inode->i_sb);
515
516         /*
517          * If the server returned a read oplock and we have mandatory brlocks,
518          * set oplock level to None.
519          */
520         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522                 oplock = 0;
523         }
524
525         cifs_down_write(&cinode->lock_sem);
526         list_add(&fdlocks->llist, &cinode->llist);
527         up_write(&cinode->lock_sem);
528
529         spin_lock(&tcon->open_file_lock);
530         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531                 oplock = fid->pending_open->oplock;
532         list_del(&fid->pending_open->olist);
533
534         fid->purge_cache = false;
535         server->ops->set_fid(cfile, fid, oplock);
536
537         list_add(&cfile->tlist, &tcon->openFileList);
538         atomic_inc(&tcon->num_local_opens);
539
540         /* if readable file instance put first in list*/
541         spin_lock(&cinode->open_file_lock);
542         if (file->f_mode & FMODE_READ)
543                 list_add(&cfile->flist, &cinode->openFileList);
544         else
545                 list_add_tail(&cfile->flist, &cinode->openFileList);
546         spin_unlock(&cinode->open_file_lock);
547         spin_unlock(&tcon->open_file_lock);
548
549         if (fid->purge_cache)
550                 cifs_zap_mapping(inode);
551
552         file->private_data = cfile;
553         return cfile;
554 }
555
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559         spin_lock(&cifs_file->file_info_lock);
560         cifsFileInfo_get_locked(cifs_file);
561         spin_unlock(&cifs_file->file_info_lock);
562         return cifs_file;
563 }
564
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567         struct inode *inode = d_inode(cifs_file->dentry);
568         struct cifsInodeInfo *cifsi = CIFS_I(inode);
569         struct cifsLockInfo *li, *tmp;
570         struct super_block *sb = inode->i_sb;
571
572         /*
573          * Delete any outstanding lock records. We'll lose them when the file
574          * is closed anyway.
575          */
576         cifs_down_write(&cifsi->lock_sem);
577         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578                 list_del(&li->llist);
579                 cifs_del_lock_waiters(li);
580                 kfree(li);
581         }
582         list_del(&cifs_file->llist->llist);
583         kfree(cifs_file->llist);
584         up_write(&cifsi->lock_sem);
585
586         cifs_put_tlink(cifs_file->tlink);
587         dput(cifs_file->dentry);
588         cifs_sb_deactive(sb);
589         kfree(cifs_file->symlink_target);
590         kfree(cifs_file);
591 }
592
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595         struct cifsFileInfo *cifs_file = container_of(work,
596                         struct cifsFileInfo, put);
597
598         cifsFileInfo_put_final(cifs_file);
599 }
600
601 void serverclose_work(struct work_struct *work)
602 {
603         struct cifsFileInfo *cifs_file = container_of(work,
604                         struct cifsFileInfo, serverclose);
605
606         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607
608         struct TCP_Server_Info *server = tcon->ses->server;
609         int rc = 0;
610         int retries = 0;
611         int MAX_RETRIES = 4;
612
613         do {
614                 if (server->ops->close_getattr)
615                         rc = server->ops->close_getattr(0, tcon, cifs_file);
616                 else if (server->ops->close)
617                         rc = server->ops->close(0, tcon, &cifs_file->fid);
618
619                 if (rc == -EBUSY || rc == -EAGAIN) {
620                         retries++;
621                         msleep(250);
622                 }
623         } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624         );
625
626         if (retries == MAX_RETRIES)
627                 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628
629         if (cifs_file->offload)
630                 queue_work(fileinfo_put_wq, &cifs_file->put);
631         else
632                 cifsFileInfo_put_final(cifs_file);
633 }
634
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644         _cifsFileInfo_put(cifs_file, true, true);
645 }
646
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:    not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664                        bool wait_oplock_handler, bool offload)
665 {
666         struct inode *inode = d_inode(cifs_file->dentry);
667         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668         struct TCP_Server_Info *server = tcon->ses->server;
669         struct cifsInodeInfo *cifsi = CIFS_I(inode);
670         struct super_block *sb = inode->i_sb;
671         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672         struct cifs_fid fid = {};
673         struct cifs_pending_open open;
674         bool oplock_break_cancelled;
675         bool serverclose_offloaded = false;
676
677         spin_lock(&tcon->open_file_lock);
678         spin_lock(&cifsi->open_file_lock);
679         spin_lock(&cifs_file->file_info_lock);
680
681         cifs_file->offload = offload;
682         if (--cifs_file->count > 0) {
683                 spin_unlock(&cifs_file->file_info_lock);
684                 spin_unlock(&cifsi->open_file_lock);
685                 spin_unlock(&tcon->open_file_lock);
686                 return;
687         }
688         spin_unlock(&cifs_file->file_info_lock);
689
690         if (server->ops->get_lease_key)
691                 server->ops->get_lease_key(inode, &fid);
692
693         /* store open in pending opens to make sure we don't miss lease break */
694         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695
696         /* remove it from the lists */
697         list_del(&cifs_file->flist);
698         list_del(&cifs_file->tlist);
699         atomic_dec(&tcon->num_local_opens);
700
701         if (list_empty(&cifsi->openFileList)) {
702                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
703                          d_inode(cifs_file->dentry));
704                 /*
705                  * In strict cache mode we need invalidate mapping on the last
706                  * close  because it may cause a error when we open this file
707                  * again and get at least level II oplock.
708                  */
709                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711                 cifs_set_oplock_level(cifsi, 0);
712         }
713
714         spin_unlock(&cifsi->open_file_lock);
715         spin_unlock(&tcon->open_file_lock);
716
717         oplock_break_cancelled = wait_oplock_handler ?
718                 cancel_work_sync(&cifs_file->oplock_break) : false;
719
720         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721                 struct TCP_Server_Info *server = tcon->ses->server;
722                 unsigned int xid;
723                 int rc = 0;
724
725                 xid = get_xid();
726                 if (server->ops->close_getattr)
727                         rc = server->ops->close_getattr(xid, tcon, cifs_file);
728                 else if (server->ops->close)
729                         rc = server->ops->close(xid, tcon, &cifs_file->fid);
730                 _free_xid(xid);
731
732                 if (rc == -EBUSY || rc == -EAGAIN) {
733                         // Server close failed, hence offloading it as an async op
734                         queue_work(serverclose_wq, &cifs_file->serverclose);
735                         serverclose_offloaded = true;
736                 }
737         }
738
739         if (oplock_break_cancelled)
740                 cifs_done_oplock_break(cifsi);
741
742         cifs_del_pending_open(&open);
743
744         // if serverclose has been offloaded to wq (on failure), it will
745         // handle offloading put as well. If serverclose not offloaded,
746         // we need to handle offloading put here.
747         if (!serverclose_offloaded) {
748                 if (offload)
749                         queue_work(fileinfo_put_wq, &cifs_file->put);
750                 else
751                         cifsFileInfo_put_final(cifs_file);
752         }
753 }
754
755 int cifs_open(struct inode *inode, struct file *file)
756
757 {
758         int rc = -EACCES;
759         unsigned int xid;
760         __u32 oplock;
761         struct cifs_sb_info *cifs_sb;
762         struct TCP_Server_Info *server;
763         struct cifs_tcon *tcon;
764         struct tcon_link *tlink;
765         struct cifsFileInfo *cfile = NULL;
766         void *page;
767         const char *full_path;
768         bool posix_open_ok = false;
769         struct cifs_fid fid = {};
770         struct cifs_pending_open open;
771         struct cifs_open_info_data data = {};
772
773         xid = get_xid();
774
775         cifs_sb = CIFS_SB(inode->i_sb);
776         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777                 free_xid(xid);
778                 return -EIO;
779         }
780
781         tlink = cifs_sb_tlink(cifs_sb);
782         if (IS_ERR(tlink)) {
783                 free_xid(xid);
784                 return PTR_ERR(tlink);
785         }
786         tcon = tlink_tcon(tlink);
787         server = tcon->ses->server;
788
789         page = alloc_dentry_path();
790         full_path = build_path_from_dentry(file_dentry(file), page);
791         if (IS_ERR(full_path)) {
792                 rc = PTR_ERR(full_path);
793                 goto out;
794         }
795
796         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797                  inode, file->f_flags, full_path);
798
799         if (file->f_flags & O_DIRECT &&
800             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802                         file->f_op = &cifs_file_direct_nobrl_ops;
803                 else
804                         file->f_op = &cifs_file_direct_ops;
805         }
806
807         /* Get the cached handle as SMB2 close is deferred */
808         rc = cifs_get_readable_path(tcon, full_path, &cfile);
809         if (rc == 0) {
810                 if (file->f_flags == cfile->f_flags) {
811                         file->private_data = cfile;
812                         spin_lock(&CIFS_I(inode)->deferred_lock);
813                         cifs_del_deferred_close(cfile);
814                         spin_unlock(&CIFS_I(inode)->deferred_lock);
815                         goto use_cache;
816                 } else {
817                         _cifsFileInfo_put(cfile, true, false);
818                 }
819         }
820
821         if (server->oplocks)
822                 oplock = REQ_OPLOCK;
823         else
824                 oplock = 0;
825
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827         if (!tcon->broken_posix_open && tcon->unix_ext &&
828             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830                 /* can not refresh inode info since size could be stale */
831                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832                                 cifs_sb->ctx->file_mode /* ignored */,
833                                 file->f_flags, &oplock, &fid.netfid, xid);
834                 if (rc == 0) {
835                         cifs_dbg(FYI, "posix open succeeded\n");
836                         posix_open_ok = true;
837                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838                         if (tcon->ses->serverNOS)
839                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840                                          tcon->ses->ip_addr,
841                                          tcon->ses->serverNOS);
842                         tcon->broken_posix_open = true;
843                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
844                          (rc != -EOPNOTSUPP)) /* path not found or net err */
845                         goto out;
846                 /*
847                  * Else fallthrough to retry open the old way on network i/o
848                  * or DFS errors.
849                  */
850         }
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852
853         if (server->ops->get_lease_key)
854                 server->ops->get_lease_key(inode, &fid);
855
856         cifs_add_pending_open(&fid, tlink, &open);
857
858         if (!posix_open_ok) {
859                 if (server->ops->get_lease_key)
860                         server->ops->get_lease_key(inode, &fid);
861
862                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863                                   xid, &data);
864                 if (rc) {
865                         cifs_del_pending_open(&open);
866                         goto out;
867                 }
868         }
869
870         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871         if (cfile == NULL) {
872                 if (server->ops->close)
873                         server->ops->close(xid, tcon, &fid);
874                 cifs_del_pending_open(&open);
875                 rc = -ENOMEM;
876                 goto out;
877         }
878
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881                 /*
882                  * Time to set mode which we can not set earlier due to
883                  * problems creating new read-only files.
884                  */
885                 struct cifs_unix_set_info_args args = {
886                         .mode   = inode->i_mode,
887                         .uid    = INVALID_UID, /* no change */
888                         .gid    = INVALID_GID, /* no change */
889                         .ctime  = NO_CHANGE_64,
890                         .atime  = NO_CHANGE_64,
891                         .mtime  = NO_CHANGE_64,
892                         .device = 0,
893                 };
894                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895                                        cfile->pid);
896         }
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898
899 use_cache:
900         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901                            file->f_mode & FMODE_WRITE);
902         if (!(file->f_flags & O_DIRECT))
903                 goto out;
904         if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905                 goto out;
906         cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907
908 out:
909         free_dentry_path(page);
910         free_xid(xid);
911         cifs_put_tlink(tlink);
912         cifs_free_open_info(&data);
913         return rc;
914 }
915
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929         int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933
934         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935         if (cinode->can_cache_brlcks) {
936                 /* can cache locks - no need to relock */
937                 up_read(&cinode->lock_sem);
938                 return rc;
939         }
940
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942         if (cap_unix(tcon->ses) &&
943             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945                 rc = cifs_push_posix_locks(cfile);
946         else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
949
950         up_read(&cinode->lock_sem);
951         return rc;
952 }
953
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957         int rc = -EACCES;
958         unsigned int xid;
959         __u32 oplock;
960         struct cifs_sb_info *cifs_sb;
961         struct cifs_tcon *tcon;
962         struct TCP_Server_Info *server;
963         struct cifsInodeInfo *cinode;
964         struct inode *inode;
965         void *page;
966         const char *full_path;
967         int desired_access;
968         int disposition = FILE_OPEN;
969         int create_options = CREATE_NOT_DIR;
970         struct cifs_open_parms oparms;
971         int rdwr_for_fscache = 0;
972
973         xid = get_xid();
974         mutex_lock(&cfile->fh_mutex);
975         if (!cfile->invalidHandle) {
976                 mutex_unlock(&cfile->fh_mutex);
977                 free_xid(xid);
978                 return 0;
979         }
980
981         inode = d_inode(cfile->dentry);
982         cifs_sb = CIFS_SB(inode->i_sb);
983         tcon = tlink_tcon(cfile->tlink);
984         server = tcon->ses->server;
985
986         /*
987          * Can not grab rename sem here because various ops, including those
988          * that already have the rename sem can end up causing writepage to get
989          * called and if the server was down that means we end up here, and we
990          * can never tell if the caller already has the rename_sem.
991          */
992         page = alloc_dentry_path();
993         full_path = build_path_from_dentry(cfile->dentry, page);
994         if (IS_ERR(full_path)) {
995                 mutex_unlock(&cfile->fh_mutex);
996                 free_dentry_path(page);
997                 free_xid(xid);
998                 return PTR_ERR(full_path);
999         }
1000
1001         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002                  inode, cfile->f_flags, full_path);
1003
1004         if (tcon->ses->server->oplocks)
1005                 oplock = REQ_OPLOCK;
1006         else
1007                 oplock = 0;
1008
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010         if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013                 /*
1014                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015                  * original open. Must mask them off for a reopen.
1016                  */
1017                 unsigned int oflags = cfile->f_flags &
1018                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
1019
1020                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021                                      cifs_sb->ctx->file_mode /* ignored */,
1022                                      oflags, &oplock, &cfile->fid.netfid, xid);
1023                 if (rc == 0) {
1024                         cifs_dbg(FYI, "posix reopen succeeded\n");
1025                         oparms.reconnect = true;
1026                         goto reopen_success;
1027                 }
1028                 /*
1029                  * fallthrough to retry open the old way on errors, especially
1030                  * in the reconnect path it is important to retry hard
1031                  */
1032         }
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034
1035         /* If we're caching, we need to be able to fill in around partial writes. */
1036         if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037                 rdwr_for_fscache = 1;
1038
1039         desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040
1041         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042         if (cfile->f_flags & O_SYNC)
1043                 create_options |= CREATE_WRITE_THROUGH;
1044
1045         if (cfile->f_flags & O_DIRECT)
1046                 create_options |= CREATE_NO_BUFFER;
1047
1048         if (server->ops->get_lease_key)
1049                 server->ops->get_lease_key(inode, &cfile->fid);
1050
1051 retry_open:
1052         oparms = (struct cifs_open_parms) {
1053                 .tcon = tcon,
1054                 .cifs_sb = cifs_sb,
1055                 .desired_access = desired_access,
1056                 .create_options = cifs_create_options(cifs_sb, create_options),
1057                 .disposition = disposition,
1058                 .path = full_path,
1059                 .fid = &cfile->fid,
1060                 .reconnect = true,
1061         };
1062
1063         /*
1064          * Can not refresh inode by passing in file_info buf to be returned by
1065          * ops->open and then calling get_inode_info with returned buf since
1066          * file might have write behind data that needs to be flushed and server
1067          * version of file size can be stale. If we knew for sure that inode was
1068          * not dirty locally we could do this.
1069          */
1070         rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071         if (rc == -ENOENT && oparms.reconnect == false) {
1072                 /* durable handle timeout is expired - open the file again */
1073                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074                 /* indicate that we need to relock the file */
1075                 oparms.reconnect = true;
1076         }
1077         if (rc == -EACCES && rdwr_for_fscache == 1) {
1078                 desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079                 rdwr_for_fscache = 2;
1080                 goto retry_open;
1081         }
1082
1083         if (rc) {
1084                 mutex_unlock(&cfile->fh_mutex);
1085                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086                 cifs_dbg(FYI, "oplock: %d\n", oplock);
1087                 goto reopen_error_exit;
1088         }
1089
1090         if (rdwr_for_fscache == 2)
1091                 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096         cfile->invalidHandle = false;
1097         mutex_unlock(&cfile->fh_mutex);
1098         cinode = CIFS_I(inode);
1099
1100         if (can_flush) {
1101                 rc = filemap_write_and_wait(inode->i_mapping);
1102                 if (!is_interrupt_error(rc))
1103                         mapping_set_error(inode->i_mapping, rc);
1104
1105                 if (tcon->posix_extensions) {
1106                         rc = smb311_posix_get_inode_info(&inode, full_path,
1107                                                          NULL, inode->i_sb, xid);
1108                 } else if (tcon->unix_ext) {
1109                         rc = cifs_get_inode_info_unix(&inode, full_path,
1110                                                       inode->i_sb, xid);
1111                 } else {
1112                         rc = cifs_get_inode_info(&inode, full_path, NULL,
1113                                                  inode->i_sb, xid, NULL);
1114                 }
1115         }
1116         /*
1117          * Else we are writing out data to server already and could deadlock if
1118          * we tried to flush data, and since we do not know if we have data that
1119          * would invalidate the current end of file on the server we can not go
1120          * to the server to get the new inode info.
1121          */
1122
1123         /*
1124          * If the server returned a read oplock and we have mandatory brlocks,
1125          * set oplock level to None.
1126          */
1127         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129                 oplock = 0;
1130         }
1131
1132         server->ops->set_fid(cfile, &cfile->fid, oplock);
1133         if (oparms.reconnect)
1134                 cifs_relock_file(cfile);
1135
1136 reopen_error_exit:
1137         free_dentry_path(page);
1138         free_xid(xid);
1139         return rc;
1140 }
1141
1142 void smb2_deferred_work_close(struct work_struct *work)
1143 {
1144         struct cifsFileInfo *cfile = container_of(work,
1145                         struct cifsFileInfo, deferred.work);
1146
1147         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148         cifs_del_deferred_close(cfile);
1149         cfile->deferred_close_scheduled = false;
1150         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151         _cifsFileInfo_put(cfile, true, false);
1152 }
1153
1154 static bool
1155 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1156 {
1157         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1158         struct cifsInodeInfo *cinode = CIFS_I(inode);
1159
1160         return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1161                         (cinode->oplock == CIFS_CACHE_RHW_FLG ||
1162                          cinode->oplock == CIFS_CACHE_RH_FLG) &&
1163                         !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1164
1165 }
1166
1167 int cifs_close(struct inode *inode, struct file *file)
1168 {
1169         struct cifsFileInfo *cfile;
1170         struct cifsInodeInfo *cinode = CIFS_I(inode);
1171         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1172         struct cifs_deferred_close *dclose;
1173
1174         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1175
1176         if (file->private_data != NULL) {
1177                 cfile = file->private_data;
1178                 file->private_data = NULL;
1179                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1180                 if ((cfile->status_file_deleted == false) &&
1181                     (smb2_can_defer_close(inode, dclose))) {
1182                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1183                                 inode_set_mtime_to_ts(inode,
1184                                                       inode_set_ctime_current(inode));
1185                         }
1186                         spin_lock(&cinode->deferred_lock);
1187                         cifs_add_deferred_close(cfile, dclose);
1188                         if (cfile->deferred_close_scheduled &&
1189                             delayed_work_pending(&cfile->deferred)) {
1190                                 /*
1191                                  * If there is no pending work, mod_delayed_work queues new work.
1192                                  * So, Increase the ref count to avoid use-after-free.
1193                                  */
1194                                 if (!mod_delayed_work(deferredclose_wq,
1195                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
1196                                         cifsFileInfo_get(cfile);
1197                         } else {
1198                                 /* Deferred close for files */
1199                                 queue_delayed_work(deferredclose_wq,
1200                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
1201                                 cfile->deferred_close_scheduled = true;
1202                                 spin_unlock(&cinode->deferred_lock);
1203                                 return 0;
1204                         }
1205                         spin_unlock(&cinode->deferred_lock);
1206                         _cifsFileInfo_put(cfile, true, false);
1207                 } else {
1208                         _cifsFileInfo_put(cfile, true, false);
1209                         kfree(dclose);
1210                 }
1211         }
1212
1213         /* return code from the ->release op is always ignored */
1214         return 0;
1215 }
1216
1217 void
1218 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1219 {
1220         struct cifsFileInfo *open_file, *tmp;
1221         struct list_head tmp_list;
1222
1223         if (!tcon->use_persistent || !tcon->need_reopen_files)
1224                 return;
1225
1226         tcon->need_reopen_files = false;
1227
1228         cifs_dbg(FYI, "Reopen persistent handles\n");
1229         INIT_LIST_HEAD(&tmp_list);
1230
1231         /* list all files open on tree connection, reopen resilient handles  */
1232         spin_lock(&tcon->open_file_lock);
1233         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1234                 if (!open_file->invalidHandle)
1235                         continue;
1236                 cifsFileInfo_get(open_file);
1237                 list_add_tail(&open_file->rlist, &tmp_list);
1238         }
1239         spin_unlock(&tcon->open_file_lock);
1240
1241         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1242                 if (cifs_reopen_file(open_file, false /* do not flush */))
1243                         tcon->need_reopen_files = true;
1244                 list_del_init(&open_file->rlist);
1245                 cifsFileInfo_put(open_file);
1246         }
1247 }
1248
1249 int cifs_closedir(struct inode *inode, struct file *file)
1250 {
1251         int rc = 0;
1252         unsigned int xid;
1253         struct cifsFileInfo *cfile = file->private_data;
1254         struct cifs_tcon *tcon;
1255         struct TCP_Server_Info *server;
1256         char *buf;
1257
1258         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1259
1260         if (cfile == NULL)
1261                 return rc;
1262
1263         xid = get_xid();
1264         tcon = tlink_tcon(cfile->tlink);
1265         server = tcon->ses->server;
1266
1267         cifs_dbg(FYI, "Freeing private data in close dir\n");
1268         spin_lock(&cfile->file_info_lock);
1269         if (server->ops->dir_needs_close(cfile)) {
1270                 cfile->invalidHandle = true;
1271                 spin_unlock(&cfile->file_info_lock);
1272                 if (server->ops->close_dir)
1273                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1274                 else
1275                         rc = -ENOSYS;
1276                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1277                 /* not much we can do if it fails anyway, ignore rc */
1278                 rc = 0;
1279         } else
1280                 spin_unlock(&cfile->file_info_lock);
1281
1282         buf = cfile->srch_inf.ntwrk_buf_start;
1283         if (buf) {
1284                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1285                 cfile->srch_inf.ntwrk_buf_start = NULL;
1286                 if (cfile->srch_inf.smallBuf)
1287                         cifs_small_buf_release(buf);
1288                 else
1289                         cifs_buf_release(buf);
1290         }
1291
1292         cifs_put_tlink(cfile->tlink);
1293         kfree(file->private_data);
1294         file->private_data = NULL;
1295         /* BB can we lock the filestruct while this is going on? */
1296         free_xid(xid);
1297         return rc;
1298 }
1299
1300 static struct cifsLockInfo *
1301 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1302 {
1303         struct cifsLockInfo *lock =
1304                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1305         if (!lock)
1306                 return lock;
1307         lock->offset = offset;
1308         lock->length = length;
1309         lock->type = type;
1310         lock->pid = current->tgid;
1311         lock->flags = flags;
1312         INIT_LIST_HEAD(&lock->blist);
1313         init_waitqueue_head(&lock->block_q);
1314         return lock;
1315 }
1316
1317 void
1318 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1319 {
1320         struct cifsLockInfo *li, *tmp;
1321         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1322                 list_del_init(&li->blist);
1323                 wake_up(&li->block_q);
1324         }
1325 }
1326
1327 #define CIFS_LOCK_OP    0
1328 #define CIFS_READ_OP    1
1329 #define CIFS_WRITE_OP   2
1330
1331 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1332 static bool
1333 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1334                             __u64 length, __u8 type, __u16 flags,
1335                             struct cifsFileInfo *cfile,
1336                             struct cifsLockInfo **conf_lock, int rw_check)
1337 {
1338         struct cifsLockInfo *li;
1339         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1340         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1341
1342         list_for_each_entry(li, &fdlocks->locks, llist) {
1343                 if (offset + length <= li->offset ||
1344                     offset >= li->offset + li->length)
1345                         continue;
1346                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1347                     server->ops->compare_fids(cfile, cur_cfile)) {
1348                         /* shared lock prevents write op through the same fid */
1349                         if (!(li->type & server->vals->shared_lock_type) ||
1350                             rw_check != CIFS_WRITE_OP)
1351                                 continue;
1352                 }
1353                 if ((type & server->vals->shared_lock_type) &&
1354                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1355                      current->tgid == li->pid) || type == li->type))
1356                         continue;
1357                 if (rw_check == CIFS_LOCK_OP &&
1358                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1359                     server->ops->compare_fids(cfile, cur_cfile))
1360                         continue;
1361                 if (conf_lock)
1362                         *conf_lock = li;
1363                 return true;
1364         }
1365         return false;
1366 }
1367
1368 bool
1369 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1370                         __u8 type, __u16 flags,
1371                         struct cifsLockInfo **conf_lock, int rw_check)
1372 {
1373         bool rc = false;
1374         struct cifs_fid_locks *cur;
1375         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1376
1377         list_for_each_entry(cur, &cinode->llist, llist) {
1378                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1379                                                  flags, cfile, conf_lock,
1380                                                  rw_check);
1381                 if (rc)
1382                         break;
1383         }
1384
1385         return rc;
1386 }
1387
1388 /*
1389  * Check if there is another lock that prevents us to set the lock (mandatory
1390  * style). If such a lock exists, update the flock structure with its
1391  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1392  * or leave it the same if we can't. Returns 0 if we don't need to request to
1393  * the server or 1 otherwise.
1394  */
1395 static int
1396 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1397                __u8 type, struct file_lock *flock)
1398 {
1399         int rc = 0;
1400         struct cifsLockInfo *conf_lock;
1401         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1402         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1403         bool exist;
1404
1405         down_read(&cinode->lock_sem);
1406
1407         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1408                                         flock->c.flc_flags, &conf_lock,
1409                                         CIFS_LOCK_OP);
1410         if (exist) {
1411                 flock->fl_start = conf_lock->offset;
1412                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1413                 flock->c.flc_pid = conf_lock->pid;
1414                 if (conf_lock->type & server->vals->shared_lock_type)
1415                         flock->c.flc_type = F_RDLCK;
1416                 else
1417                         flock->c.flc_type = F_WRLCK;
1418         } else if (!cinode->can_cache_brlcks)
1419                 rc = 1;
1420         else
1421                 flock->c.flc_type = F_UNLCK;
1422
1423         up_read(&cinode->lock_sem);
1424         return rc;
1425 }
1426
1427 static void
1428 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1429 {
1430         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1431         cifs_down_write(&cinode->lock_sem);
1432         list_add_tail(&lock->llist, &cfile->llist->locks);
1433         up_write(&cinode->lock_sem);
1434 }
1435
1436 /*
1437  * Set the byte-range lock (mandatory style). Returns:
1438  * 1) 0, if we set the lock and don't need to request to the server;
1439  * 2) 1, if no locks prevent us but we need to request to the server;
1440  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1441  */
1442 static int
1443 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1444                  bool wait)
1445 {
1446         struct cifsLockInfo *conf_lock;
1447         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1448         bool exist;
1449         int rc = 0;
1450
1451 try_again:
1452         exist = false;
1453         cifs_down_write(&cinode->lock_sem);
1454
1455         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1456                                         lock->type, lock->flags, &conf_lock,
1457                                         CIFS_LOCK_OP);
1458         if (!exist && cinode->can_cache_brlcks) {
1459                 list_add_tail(&lock->llist, &cfile->llist->locks);
1460                 up_write(&cinode->lock_sem);
1461                 return rc;
1462         }
1463
1464         if (!exist)
1465                 rc = 1;
1466         else if (!wait)
1467                 rc = -EACCES;
1468         else {
1469                 list_add_tail(&lock->blist, &conf_lock->blist);
1470                 up_write(&cinode->lock_sem);
1471                 rc = wait_event_interruptible(lock->block_q,
1472                                         (lock->blist.prev == &lock->blist) &&
1473                                         (lock->blist.next == &lock->blist));
1474                 if (!rc)
1475                         goto try_again;
1476                 cifs_down_write(&cinode->lock_sem);
1477                 list_del_init(&lock->blist);
1478         }
1479
1480         up_write(&cinode->lock_sem);
1481         return rc;
1482 }
1483
1484 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1485 /*
1486  * Check if there is another lock that prevents us to set the lock (posix
1487  * style). If such a lock exists, update the flock structure with its
1488  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1489  * or leave it the same if we can't. Returns 0 if we don't need to request to
1490  * the server or 1 otherwise.
1491  */
1492 static int
1493 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1494 {
1495         int rc = 0;
1496         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1497         unsigned char saved_type = flock->c.flc_type;
1498
1499         if ((flock->c.flc_flags & FL_POSIX) == 0)
1500                 return 1;
1501
1502         down_read(&cinode->lock_sem);
1503         posix_test_lock(file, flock);
1504
1505         if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1506                 flock->c.flc_type = saved_type;
1507                 rc = 1;
1508         }
1509
1510         up_read(&cinode->lock_sem);
1511         return rc;
1512 }
1513
1514 /*
1515  * Set the byte-range lock (posix style). Returns:
1516  * 1) <0, if the error occurs while setting the lock;
1517  * 2) 0, if we set the lock and don't need to request to the server;
1518  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1519  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1520  */
1521 static int
1522 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1523 {
1524         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1525         int rc = FILE_LOCK_DEFERRED + 1;
1526
1527         if ((flock->c.flc_flags & FL_POSIX) == 0)
1528                 return rc;
1529
1530         cifs_down_write(&cinode->lock_sem);
1531         if (!cinode->can_cache_brlcks) {
1532                 up_write(&cinode->lock_sem);
1533                 return rc;
1534         }
1535
1536         rc = posix_lock_file(file, flock, NULL);
1537         up_write(&cinode->lock_sem);
1538         return rc;
1539 }
1540
1541 int
1542 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1543 {
1544         unsigned int xid;
1545         int rc = 0, stored_rc;
1546         struct cifsLockInfo *li, *tmp;
1547         struct cifs_tcon *tcon;
1548         unsigned int num, max_num, max_buf;
1549         LOCKING_ANDX_RANGE *buf, *cur;
1550         static const int types[] = {
1551                 LOCKING_ANDX_LARGE_FILES,
1552                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1553         };
1554         int i;
1555
1556         xid = get_xid();
1557         tcon = tlink_tcon(cfile->tlink);
1558
1559         /*
1560          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1561          * and check it before using.
1562          */
1563         max_buf = tcon->ses->server->maxBuf;
1564         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1565                 free_xid(xid);
1566                 return -EINVAL;
1567         }
1568
1569         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1570                      PAGE_SIZE);
1571         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1572                         PAGE_SIZE);
1573         max_num = (max_buf - sizeof(struct smb_hdr)) /
1574                                                 sizeof(LOCKING_ANDX_RANGE);
1575         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1576         if (!buf) {
1577                 free_xid(xid);
1578                 return -ENOMEM;
1579         }
1580
1581         for (i = 0; i < 2; i++) {
1582                 cur = buf;
1583                 num = 0;
1584                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1585                         if (li->type != types[i])
1586                                 continue;
1587                         cur->Pid = cpu_to_le16(li->pid);
1588                         cur->LengthLow = cpu_to_le32((u32)li->length);
1589                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592                         if (++num == max_num) {
1593                                 stored_rc = cifs_lockv(xid, tcon,
1594                                                        cfile->fid.netfid,
1595                                                        (__u8)li->type, 0, num,
1596                                                        buf);
1597                                 if (stored_rc)
1598                                         rc = stored_rc;
1599                                 cur = buf;
1600                                 num = 0;
1601                         } else
1602                                 cur++;
1603                 }
1604
1605                 if (num) {
1606                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1607                                                (__u8)types[i], 0, num, buf);
1608                         if (stored_rc)
1609                                 rc = stored_rc;
1610                 }
1611         }
1612
1613         kfree(buf);
1614         free_xid(xid);
1615         return rc;
1616 }
1617
1618 static __u32
1619 hash_lockowner(fl_owner_t owner)
1620 {
1621         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1622 }
1623 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1624
1625 struct lock_to_push {
1626         struct list_head llist;
1627         __u64 offset;
1628         __u64 length;
1629         __u32 pid;
1630         __u16 netfid;
1631         __u8 type;
1632 };
1633
1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635 static int
1636 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1637 {
1638         struct inode *inode = d_inode(cfile->dentry);
1639         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1640         struct file_lock *flock;
1641         struct file_lock_context *flctx = locks_inode_context(inode);
1642         unsigned int count = 0, i;
1643         int rc = 0, xid, type;
1644         struct list_head locks_to_send, *el;
1645         struct lock_to_push *lck, *tmp;
1646         __u64 length;
1647
1648         xid = get_xid();
1649
1650         if (!flctx)
1651                 goto out;
1652
1653         spin_lock(&flctx->flc_lock);
1654         list_for_each(el, &flctx->flc_posix) {
1655                 count++;
1656         }
1657         spin_unlock(&flctx->flc_lock);
1658
1659         INIT_LIST_HEAD(&locks_to_send);
1660
1661         /*
1662          * Allocating count locks is enough because no FL_POSIX locks can be
1663          * added to the list while we are holding cinode->lock_sem that
1664          * protects locking operations of this inode.
1665          */
1666         for (i = 0; i < count; i++) {
1667                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1668                 if (!lck) {
1669                         rc = -ENOMEM;
1670                         goto err_out;
1671                 }
1672                 list_add_tail(&lck->llist, &locks_to_send);
1673         }
1674
1675         el = locks_to_send.next;
1676         spin_lock(&flctx->flc_lock);
1677         for_each_file_lock(flock, &flctx->flc_posix) {
1678                 unsigned char ftype = flock->c.flc_type;
1679
1680                 if (el == &locks_to_send) {
1681                         /*
1682                          * The list ended. We don't have enough allocated
1683                          * structures - something is really wrong.
1684                          */
1685                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1686                         break;
1687                 }
1688                 length = cifs_flock_len(flock);
1689                 if (ftype == F_RDLCK || ftype == F_SHLCK)
1690                         type = CIFS_RDLCK;
1691                 else
1692                         type = CIFS_WRLCK;
1693                 lck = list_entry(el, struct lock_to_push, llist);
1694                 lck->pid = hash_lockowner(flock->c.flc_owner);
1695                 lck->netfid = cfile->fid.netfid;
1696                 lck->length = length;
1697                 lck->type = type;
1698                 lck->offset = flock->fl_start;
1699         }
1700         spin_unlock(&flctx->flc_lock);
1701
1702         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1703                 int stored_rc;
1704
1705                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1706                                              lck->offset, lck->length, NULL,
1707                                              lck->type, 0);
1708                 if (stored_rc)
1709                         rc = stored_rc;
1710                 list_del(&lck->llist);
1711                 kfree(lck);
1712         }
1713
1714 out:
1715         free_xid(xid);
1716         return rc;
1717 err_out:
1718         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1719                 list_del(&lck->llist);
1720                 kfree(lck);
1721         }
1722         goto out;
1723 }
1724 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1725
1726 static int
1727 cifs_push_locks(struct cifsFileInfo *cfile)
1728 {
1729         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1730         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1731         int rc = 0;
1732 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1733         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1735
1736         /* we are going to update can_cache_brlcks here - need a write access */
1737         cifs_down_write(&cinode->lock_sem);
1738         if (!cinode->can_cache_brlcks) {
1739                 up_write(&cinode->lock_sem);
1740                 return rc;
1741         }
1742
1743 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1744         if (cap_unix(tcon->ses) &&
1745             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1746             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1747                 rc = cifs_push_posix_locks(cfile);
1748         else
1749 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1750                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1751
1752         cinode->can_cache_brlcks = false;
1753         up_write(&cinode->lock_sem);
1754         return rc;
1755 }
1756
1757 static void
1758 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1759                 bool *wait_flag, struct TCP_Server_Info *server)
1760 {
1761         if (flock->c.flc_flags & FL_POSIX)
1762                 cifs_dbg(FYI, "Posix\n");
1763         if (flock->c.flc_flags & FL_FLOCK)
1764                 cifs_dbg(FYI, "Flock\n");
1765         if (flock->c.flc_flags & FL_SLEEP) {
1766                 cifs_dbg(FYI, "Blocking lock\n");
1767                 *wait_flag = true;
1768         }
1769         if (flock->c.flc_flags & FL_ACCESS)
1770                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1771         if (flock->c.flc_flags & FL_LEASE)
1772                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1773         if (flock->c.flc_flags &
1774             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1775                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1776                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1777                          flock->c.flc_flags);
1778
1779         *type = server->vals->large_lock_type;
1780         if (lock_is_write(flock)) {
1781                 cifs_dbg(FYI, "F_WRLCK\n");
1782                 *type |= server->vals->exclusive_lock_type;
1783                 *lock = 1;
1784         } else if (lock_is_unlock(flock)) {
1785                 cifs_dbg(FYI, "F_UNLCK\n");
1786                 *type |= server->vals->unlock_lock_type;
1787                 *unlock = 1;
1788                 /* Check if unlock includes more than one lock range */
1789         } else if (lock_is_read(flock)) {
1790                 cifs_dbg(FYI, "F_RDLCK\n");
1791                 *type |= server->vals->shared_lock_type;
1792                 *lock = 1;
1793         } else if (flock->c.flc_type == F_EXLCK) {
1794                 cifs_dbg(FYI, "F_EXLCK\n");
1795                 *type |= server->vals->exclusive_lock_type;
1796                 *lock = 1;
1797         } else if (flock->c.flc_type == F_SHLCK) {
1798                 cifs_dbg(FYI, "F_SHLCK\n");
1799                 *type |= server->vals->shared_lock_type;
1800                 *lock = 1;
1801         } else
1802                 cifs_dbg(FYI, "Unknown type of lock\n");
1803 }
1804
1805 static int
1806 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1807            bool wait_flag, bool posix_lck, unsigned int xid)
1808 {
1809         int rc = 0;
1810         __u64 length = cifs_flock_len(flock);
1811         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1812         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813         struct TCP_Server_Info *server = tcon->ses->server;
1814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1815         __u16 netfid = cfile->fid.netfid;
1816
1817         if (posix_lck) {
1818                 int posix_lock_type;
1819
1820                 rc = cifs_posix_lock_test(file, flock);
1821                 if (!rc)
1822                         return rc;
1823
1824                 if (type & server->vals->shared_lock_type)
1825                         posix_lock_type = CIFS_RDLCK;
1826                 else
1827                         posix_lock_type = CIFS_WRLCK;
1828                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1829                                       hash_lockowner(flock->c.flc_owner),
1830                                       flock->fl_start, length, flock,
1831                                       posix_lock_type, wait_flag);
1832                 return rc;
1833         }
1834 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1835
1836         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1837         if (!rc)
1838                 return rc;
1839
1840         /* BB we could chain these into one lock request BB */
1841         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1842                                     1, 0, false);
1843         if (rc == 0) {
1844                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1845                                             type, 0, 1, false);
1846                 flock->c.flc_type = F_UNLCK;
1847                 if (rc != 0)
1848                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1849                                  rc);
1850                 return 0;
1851         }
1852
1853         if (type & server->vals->shared_lock_type) {
1854                 flock->c.flc_type = F_WRLCK;
1855                 return 0;
1856         }
1857
1858         type &= ~server->vals->exclusive_lock_type;
1859
1860         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1861                                     type | server->vals->shared_lock_type,
1862                                     1, 0, false);
1863         if (rc == 0) {
1864                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1865                         type | server->vals->shared_lock_type, 0, 1, false);
1866                 flock->c.flc_type = F_RDLCK;
1867                 if (rc != 0)
1868                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1869                                  rc);
1870         } else
1871                 flock->c.flc_type = F_WRLCK;
1872
1873         return 0;
1874 }
1875
1876 void
1877 cifs_move_llist(struct list_head *source, struct list_head *dest)
1878 {
1879         struct list_head *li, *tmp;
1880         list_for_each_safe(li, tmp, source)
1881                 list_move(li, dest);
1882 }
1883
1884 void
1885 cifs_free_llist(struct list_head *llist)
1886 {
1887         struct cifsLockInfo *li, *tmp;
1888         list_for_each_entry_safe(li, tmp, llist, llist) {
1889                 cifs_del_lock_waiters(li);
1890                 list_del(&li->llist);
1891                 kfree(li);
1892         }
1893 }
1894
1895 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1896 int
1897 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1898                   unsigned int xid)
1899 {
1900         int rc = 0, stored_rc;
1901         static const int types[] = {
1902                 LOCKING_ANDX_LARGE_FILES,
1903                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1904         };
1905         unsigned int i;
1906         unsigned int max_num, num, max_buf;
1907         LOCKING_ANDX_RANGE *buf, *cur;
1908         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1909         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1910         struct cifsLockInfo *li, *tmp;
1911         __u64 length = cifs_flock_len(flock);
1912         struct list_head tmp_llist;
1913
1914         INIT_LIST_HEAD(&tmp_llist);
1915
1916         /*
1917          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1918          * and check it before using.
1919          */
1920         max_buf = tcon->ses->server->maxBuf;
1921         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1922                 return -EINVAL;
1923
1924         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1925                      PAGE_SIZE);
1926         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1927                         PAGE_SIZE);
1928         max_num = (max_buf - sizeof(struct smb_hdr)) /
1929                                                 sizeof(LOCKING_ANDX_RANGE);
1930         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1931         if (!buf)
1932                 return -ENOMEM;
1933
1934         cifs_down_write(&cinode->lock_sem);
1935         for (i = 0; i < 2; i++) {
1936                 cur = buf;
1937                 num = 0;
1938                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1939                         if (flock->fl_start > li->offset ||
1940                             (flock->fl_start + length) <
1941                             (li->offset + li->length))
1942                                 continue;
1943                         if (current->tgid != li->pid)
1944                                 continue;
1945                         if (types[i] != li->type)
1946                                 continue;
1947                         if (cinode->can_cache_brlcks) {
1948                                 /*
1949                                  * We can cache brlock requests - simply remove
1950                                  * a lock from the file's list.
1951                                  */
1952                                 list_del(&li->llist);
1953                                 cifs_del_lock_waiters(li);
1954                                 kfree(li);
1955                                 continue;
1956                         }
1957                         cur->Pid = cpu_to_le16(li->pid);
1958                         cur->LengthLow = cpu_to_le32((u32)li->length);
1959                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1960                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1961                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1962                         /*
1963                          * We need to save a lock here to let us add it again to
1964                          * the file's list if the unlock range request fails on
1965                          * the server.
1966                          */
1967                         list_move(&li->llist, &tmp_llist);
1968                         if (++num == max_num) {
1969                                 stored_rc = cifs_lockv(xid, tcon,
1970                                                        cfile->fid.netfid,
1971                                                        li->type, num, 0, buf);
1972                                 if (stored_rc) {
1973                                         /*
1974                                          * We failed on the unlock range
1975                                          * request - add all locks from the tmp
1976                                          * list to the head of the file's list.
1977                                          */
1978                                         cifs_move_llist(&tmp_llist,
1979                                                         &cfile->llist->locks);
1980                                         rc = stored_rc;
1981                                 } else
1982                                         /*
1983                                          * The unlock range request succeed -
1984                                          * free the tmp list.
1985                                          */
1986                                         cifs_free_llist(&tmp_llist);
1987                                 cur = buf;
1988                                 num = 0;
1989                         } else
1990                                 cur++;
1991                 }
1992                 if (num) {
1993                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1994                                                types[i], num, 0, buf);
1995                         if (stored_rc) {
1996                                 cifs_move_llist(&tmp_llist,
1997                                                 &cfile->llist->locks);
1998                                 rc = stored_rc;
1999                         } else
2000                                 cifs_free_llist(&tmp_llist);
2001                 }
2002         }
2003
2004         up_write(&cinode->lock_sem);
2005         kfree(buf);
2006         return rc;
2007 }
2008 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2009
2010 static int
2011 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2012            bool wait_flag, bool posix_lck, int lock, int unlock,
2013            unsigned int xid)
2014 {
2015         int rc = 0;
2016         __u64 length = cifs_flock_len(flock);
2017         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2018         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2019         struct TCP_Server_Info *server = tcon->ses->server;
2020         struct inode *inode = d_inode(cfile->dentry);
2021
2022 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2023         if (posix_lck) {
2024                 int posix_lock_type;
2025
2026                 rc = cifs_posix_lock_set(file, flock);
2027                 if (rc <= FILE_LOCK_DEFERRED)
2028                         return rc;
2029
2030                 if (type & server->vals->shared_lock_type)
2031                         posix_lock_type = CIFS_RDLCK;
2032                 else
2033                         posix_lock_type = CIFS_WRLCK;
2034
2035                 if (unlock == 1)
2036                         posix_lock_type = CIFS_UNLCK;
2037
2038                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2039                                       hash_lockowner(flock->c.flc_owner),
2040                                       flock->fl_start, length,
2041                                       NULL, posix_lock_type, wait_flag);
2042                 goto out;
2043         }
2044 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2045         if (lock) {
2046                 struct cifsLockInfo *lock;
2047
2048                 lock = cifs_lock_init(flock->fl_start, length, type,
2049                                       flock->c.flc_flags);
2050                 if (!lock)
2051                         return -ENOMEM;
2052
2053                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
2054                 if (rc < 0) {
2055                         kfree(lock);
2056                         return rc;
2057                 }
2058                 if (!rc)
2059                         goto out;
2060
2061                 /*
2062                  * Windows 7 server can delay breaking lease from read to None
2063                  * if we set a byte-range lock on a file - break it explicitly
2064                  * before sending the lock to the server to be sure the next
2065                  * read won't conflict with non-overlapted locks due to
2066                  * pagereading.
2067                  */
2068                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2069                                         CIFS_CACHE_READ(CIFS_I(inode))) {
2070                         cifs_zap_mapping(inode);
2071                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2072                                  inode);
2073                         CIFS_I(inode)->oplock = 0;
2074                 }
2075
2076                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2077                                             type, 1, 0, wait_flag);
2078                 if (rc) {
2079                         kfree(lock);
2080                         return rc;
2081                 }
2082
2083                 cifs_lock_add(cfile, lock);
2084         } else if (unlock)
2085                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
2086
2087 out:
2088         if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
2089                 /*
2090                  * If this is a request to remove all locks because we
2091                  * are closing the file, it doesn't matter if the
2092                  * unlocking failed as both cifs.ko and the SMB server
2093                  * remove the lock on file close
2094                  */
2095                 if (rc) {
2096                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2097                         if (!(flock->c.flc_flags & FL_CLOSE))
2098                                 return rc;
2099                 }
2100                 rc = locks_lock_file_wait(file, flock);
2101         }
2102         return rc;
2103 }
2104
2105 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2106 {
2107         int rc, xid;
2108         int lock = 0, unlock = 0;
2109         bool wait_flag = false;
2110         bool posix_lck = false;
2111         struct cifs_sb_info *cifs_sb;
2112         struct cifs_tcon *tcon;
2113         struct cifsFileInfo *cfile;
2114         __u32 type;
2115
2116         xid = get_xid();
2117
2118         if (!(fl->c.flc_flags & FL_FLOCK)) {
2119                 rc = -ENOLCK;
2120                 free_xid(xid);
2121                 return rc;
2122         }
2123
2124         cfile = (struct cifsFileInfo *)file->private_data;
2125         tcon = tlink_tcon(cfile->tlink);
2126
2127         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2128                         tcon->ses->server);
2129         cifs_sb = CIFS_FILE_SB(file);
2130
2131         if (cap_unix(tcon->ses) &&
2132             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2133             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2134                 posix_lck = true;
2135
2136         if (!lock && !unlock) {
2137                 /*
2138                  * if no lock or unlock then nothing to do since we do not
2139                  * know what it is
2140                  */
2141                 rc = -EOPNOTSUPP;
2142                 free_xid(xid);
2143                 return rc;
2144         }
2145
2146         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2147                         xid);
2148         free_xid(xid);
2149         return rc;
2150
2151
2152 }
2153
2154 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2155 {
2156         int rc, xid;
2157         int lock = 0, unlock = 0;
2158         bool wait_flag = false;
2159         bool posix_lck = false;
2160         struct cifs_sb_info *cifs_sb;
2161         struct cifs_tcon *tcon;
2162         struct cifsFileInfo *cfile;
2163         __u32 type;
2164
2165         rc = -EACCES;
2166         xid = get_xid();
2167
2168         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2169                  flock->c.flc_flags, flock->c.flc_type,
2170                  (long long)flock->fl_start,
2171                  (long long)flock->fl_end);
2172
2173         cfile = (struct cifsFileInfo *)file->private_data;
2174         tcon = tlink_tcon(cfile->tlink);
2175
2176         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2177                         tcon->ses->server);
2178         cifs_sb = CIFS_FILE_SB(file);
2179         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2180
2181         if (cap_unix(tcon->ses) &&
2182             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2183             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2184                 posix_lck = true;
2185         /*
2186          * BB add code here to normalize offset and length to account for
2187          * negative length which we can not accept over the wire.
2188          */
2189         if (IS_GETLK(cmd)) {
2190                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2191                 free_xid(xid);
2192                 return rc;
2193         }
2194
2195         if (!lock && !unlock) {
2196                 /*
2197                  * if no lock or unlock then nothing to do since we do not
2198                  * know what it is
2199                  */
2200                 free_xid(xid);
2201                 return -EOPNOTSUPP;
2202         }
2203
2204         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2205                         xid);
2206         free_xid(xid);
2207         return rc;
2208 }
2209
2210 /*
2211  * update the file size (if needed) after a write. Should be called with
2212  * the inode->i_lock held
2213  */
2214 void
2215 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2216                       unsigned int bytes_written)
2217 {
2218         loff_t end_of_write = offset + bytes_written;
2219
2220         if (end_of_write > cifsi->netfs.remote_i_size)
2221                 netfs_resize_file(&cifsi->netfs, end_of_write, true);
2222 }
2223
2224 static ssize_t
2225 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2226            size_t write_size, loff_t *offset)
2227 {
2228         int rc = 0;
2229         unsigned int bytes_written = 0;
2230         unsigned int total_written;
2231         struct cifs_tcon *tcon;
2232         struct TCP_Server_Info *server;
2233         unsigned int xid;
2234         struct dentry *dentry = open_file->dentry;
2235         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2236         struct cifs_io_parms io_parms = {0};
2237
2238         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2239                  write_size, *offset, dentry);
2240
2241         tcon = tlink_tcon(open_file->tlink);
2242         server = tcon->ses->server;
2243
2244         if (!server->ops->sync_write)
2245                 return -ENOSYS;
2246
2247         xid = get_xid();
2248
2249         for (total_written = 0; write_size > total_written;
2250              total_written += bytes_written) {
2251                 rc = -EAGAIN;
2252                 while (rc == -EAGAIN) {
2253                         struct kvec iov[2];
2254                         unsigned int len;
2255
2256                         if (open_file->invalidHandle) {
2257                                 /* we could deadlock if we called
2258                                    filemap_fdatawait from here so tell
2259                                    reopen_file not to flush data to
2260                                    server now */
2261                                 rc = cifs_reopen_file(open_file, false);
2262                                 if (rc != 0)
2263                                         break;
2264                         }
2265
2266                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2267                                   (unsigned int)write_size - total_written);
2268                         /* iov[0] is reserved for smb header */
2269                         iov[1].iov_base = (char *)write_data + total_written;
2270                         iov[1].iov_len = len;
2271                         io_parms.pid = pid;
2272                         io_parms.tcon = tcon;
2273                         io_parms.offset = *offset;
2274                         io_parms.length = len;
2275                         rc = server->ops->sync_write(xid, &open_file->fid,
2276                                         &io_parms, &bytes_written, iov, 1);
2277                 }
2278                 if (rc || (bytes_written == 0)) {
2279                         if (total_written)
2280                                 break;
2281                         else {
2282                                 free_xid(xid);
2283                                 return rc;
2284                         }
2285                 } else {
2286                         spin_lock(&d_inode(dentry)->i_lock);
2287                         cifs_update_eof(cifsi, *offset, bytes_written);
2288                         spin_unlock(&d_inode(dentry)->i_lock);
2289                         *offset += bytes_written;
2290                 }
2291         }
2292
2293         cifs_stats_bytes_written(tcon, total_written);
2294
2295         if (total_written > 0) {
2296                 spin_lock(&d_inode(dentry)->i_lock);
2297                 if (*offset > d_inode(dentry)->i_size) {
2298                         i_size_write(d_inode(dentry), *offset);
2299                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2300                 }
2301                 spin_unlock(&d_inode(dentry)->i_lock);
2302         }
2303         mark_inode_dirty_sync(d_inode(dentry));
2304         free_xid(xid);
2305         return total_written;
2306 }
2307
2308 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2309                                         bool fsuid_only)
2310 {
2311         struct cifsFileInfo *open_file = NULL;
2312         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2313
2314         /* only filter by fsuid on multiuser mounts */
2315         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2316                 fsuid_only = false;
2317
2318         spin_lock(&cifs_inode->open_file_lock);
2319         /* we could simply get the first_list_entry since write-only entries
2320            are always at the end of the list but since the first entry might
2321            have a close pending, we go through the whole list */
2322         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2323                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2324                         continue;
2325                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2326                         if ((!open_file->invalidHandle)) {
2327                                 /* found a good file */
2328                                 /* lock it so it will not be closed on us */
2329                                 cifsFileInfo_get(open_file);
2330                                 spin_unlock(&cifs_inode->open_file_lock);
2331                                 return open_file;
2332                         } /* else might as well continue, and look for
2333                              another, or simply have the caller reopen it
2334                              again rather than trying to fix this handle */
2335                 } else /* write only file */
2336                         break; /* write only files are last so must be done */
2337         }
2338         spin_unlock(&cifs_inode->open_file_lock);
2339         return NULL;
2340 }
2341
2342 /* Return -EBADF if no handle is found and general rc otherwise */
2343 int
2344 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2345                        struct cifsFileInfo **ret_file)
2346 {
2347         struct cifsFileInfo *open_file, *inv_file = NULL;
2348         struct cifs_sb_info *cifs_sb;
2349         bool any_available = false;
2350         int rc = -EBADF;
2351         unsigned int refind = 0;
2352         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2353         bool with_delete = flags & FIND_WR_WITH_DELETE;
2354         *ret_file = NULL;
2355
2356         /*
2357          * Having a null inode here (because mapping->host was set to zero by
2358          * the VFS or MM) should not happen but we had reports of on oops (due
2359          * to it being zero) during stress testcases so we need to check for it
2360          */
2361
2362         if (cifs_inode == NULL) {
2363                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2364                 dump_stack();
2365                 return rc;
2366         }
2367
2368         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2369
2370         /* only filter by fsuid on multiuser mounts */
2371         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2372                 fsuid_only = false;
2373
2374         spin_lock(&cifs_inode->open_file_lock);
2375 refind_writable:
2376         if (refind > MAX_REOPEN_ATT) {
2377                 spin_unlock(&cifs_inode->open_file_lock);
2378                 return rc;
2379         }
2380         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2381                 if (!any_available && open_file->pid != current->tgid)
2382                         continue;
2383                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2384                         continue;
2385                 if (with_delete && !(open_file->fid.access & DELETE))
2386                         continue;
2387                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2388                         if (!open_file->invalidHandle) {
2389                                 /* found a good writable file */
2390                                 cifsFileInfo_get(open_file);
2391                                 spin_unlock(&cifs_inode->open_file_lock);
2392                                 *ret_file = open_file;
2393                                 return 0;
2394                         } else {
2395                                 if (!inv_file)
2396                                         inv_file = open_file;
2397                         }
2398                 }
2399         }
2400         /* couldn't find useable FH with same pid, try any available */
2401         if (!any_available) {
2402                 any_available = true;
2403                 goto refind_writable;
2404         }
2405
2406         if (inv_file) {
2407                 any_available = false;
2408                 cifsFileInfo_get(inv_file);
2409         }
2410
2411         spin_unlock(&cifs_inode->open_file_lock);
2412
2413         if (inv_file) {
2414                 rc = cifs_reopen_file(inv_file, false);
2415                 if (!rc) {
2416                         *ret_file = inv_file;
2417                         return 0;
2418                 }
2419
2420                 spin_lock(&cifs_inode->open_file_lock);
2421                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2422                 spin_unlock(&cifs_inode->open_file_lock);
2423                 cifsFileInfo_put(inv_file);
2424                 ++refind;
2425                 inv_file = NULL;
2426                 spin_lock(&cifs_inode->open_file_lock);
2427                 goto refind_writable;
2428         }
2429
2430         return rc;
2431 }
2432
2433 struct cifsFileInfo *
2434 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2435 {
2436         struct cifsFileInfo *cfile;
2437         int rc;
2438
2439         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2440         if (rc)
2441                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2442
2443         return cfile;
2444 }
2445
2446 int
2447 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2448                        int flags,
2449                        struct cifsFileInfo **ret_file)
2450 {
2451         struct cifsFileInfo *cfile;
2452         void *page = alloc_dentry_path();
2453
2454         *ret_file = NULL;
2455
2456         spin_lock(&tcon->open_file_lock);
2457         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2458                 struct cifsInodeInfo *cinode;
2459                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2460                 if (IS_ERR(full_path)) {
2461                         spin_unlock(&tcon->open_file_lock);
2462                         free_dentry_path(page);
2463                         return PTR_ERR(full_path);
2464                 }
2465                 if (strcmp(full_path, name))
2466                         continue;
2467
2468                 cinode = CIFS_I(d_inode(cfile->dentry));
2469                 spin_unlock(&tcon->open_file_lock);
2470                 free_dentry_path(page);
2471                 return cifs_get_writable_file(cinode, flags, ret_file);
2472         }
2473
2474         spin_unlock(&tcon->open_file_lock);
2475         free_dentry_path(page);
2476         return -ENOENT;
2477 }
2478
2479 int
2480 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2481                        struct cifsFileInfo **ret_file)
2482 {
2483         struct cifsFileInfo *cfile;
2484         void *page = alloc_dentry_path();
2485
2486         *ret_file = NULL;
2487
2488         spin_lock(&tcon->open_file_lock);
2489         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2490                 struct cifsInodeInfo *cinode;
2491                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2492                 if (IS_ERR(full_path)) {
2493                         spin_unlock(&tcon->open_file_lock);
2494                         free_dentry_path(page);
2495                         return PTR_ERR(full_path);
2496                 }
2497                 if (strcmp(full_path, name))
2498                         continue;
2499
2500                 cinode = CIFS_I(d_inode(cfile->dentry));
2501                 spin_unlock(&tcon->open_file_lock);
2502                 free_dentry_path(page);
2503                 *ret_file = find_readable_file(cinode, 0);
2504                 return *ret_file ? 0 : -ENOENT;
2505         }
2506
2507         spin_unlock(&tcon->open_file_lock);
2508         free_dentry_path(page);
2509         return -ENOENT;
2510 }
2511
2512 void
2513 cifs_writedata_release(struct kref *refcount)
2514 {
2515         struct cifs_writedata *wdata = container_of(refcount,
2516                                         struct cifs_writedata, refcount);
2517 #ifdef CONFIG_CIFS_SMB_DIRECT
2518         if (wdata->mr) {
2519                 smbd_deregister_mr(wdata->mr);
2520                 wdata->mr = NULL;
2521         }
2522 #endif
2523
2524         if (wdata->cfile)
2525                 cifsFileInfo_put(wdata->cfile);
2526
2527         kfree(wdata);
2528 }
2529
2530 /*
2531  * Write failed with a retryable error. Resend the write request. It's also
2532  * possible that the page was redirtied so re-clean the page.
2533  */
2534 static void
2535 cifs_writev_requeue(struct cifs_writedata *wdata)
2536 {
2537         int rc = 0;
2538         struct inode *inode = d_inode(wdata->cfile->dentry);
2539         struct TCP_Server_Info *server;
2540         unsigned int rest_len = wdata->bytes;
2541         loff_t fpos = wdata->offset;
2542
2543         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2544         do {
2545                 struct cifs_writedata *wdata2;
2546                 unsigned int wsize, cur_len;
2547
2548                 wsize = server->ops->wp_retry_size(inode);
2549                 if (wsize < rest_len) {
2550                         if (wsize < PAGE_SIZE) {
2551                                 rc = -EOPNOTSUPP;
2552                                 break;
2553                         }
2554                         cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2555                 } else {
2556                         cur_len = rest_len;
2557                 }
2558
2559                 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2560                 if (!wdata2) {
2561                         rc = -ENOMEM;
2562                         break;
2563                 }
2564
2565                 wdata2->sync_mode = wdata->sync_mode;
2566                 wdata2->offset  = fpos;
2567                 wdata2->bytes   = cur_len;
2568                 wdata2->iter    = wdata->iter;
2569
2570                 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2571                 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2572
2573                 if (iov_iter_is_xarray(&wdata2->iter))
2574                         /* Check for pages having been redirtied and clean
2575                          * them.  We can do this by walking the xarray.  If
2576                          * it's not an xarray, then it's a DIO and we shouldn't
2577                          * be mucking around with the page bits.
2578                          */
2579                         cifs_undirty_folios(inode, fpos, cur_len);
2580
2581                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2582                                             &wdata2->cfile);
2583                 if (!wdata2->cfile) {
2584                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2585                                  rc);
2586                         if (!is_retryable_error(rc))
2587                                 rc = -EBADF;
2588                 } else {
2589                         wdata2->pid = wdata2->cfile->pid;
2590                         rc = server->ops->async_writev(wdata2,
2591                                                        cifs_writedata_release);
2592                 }
2593
2594                 kref_put(&wdata2->refcount, cifs_writedata_release);
2595                 if (rc) {
2596                         if (is_retryable_error(rc))
2597                                 continue;
2598                         fpos += cur_len;
2599                         rest_len -= cur_len;
2600                         break;
2601                 }
2602
2603                 fpos += cur_len;
2604                 rest_len -= cur_len;
2605         } while (rest_len > 0);
2606
2607         /* Clean up remaining pages from the original wdata */
2608         if (iov_iter_is_xarray(&wdata->iter))
2609                 cifs_pages_write_failed(inode, fpos, rest_len);
2610
2611         if (rc != 0 && !is_retryable_error(rc))
2612                 mapping_set_error(inode->i_mapping, rc);
2613         kref_put(&wdata->refcount, cifs_writedata_release);
2614 }
2615
2616 void
2617 cifs_writev_complete(struct work_struct *work)
2618 {
2619         struct cifs_writedata *wdata = container_of(work,
2620                                                 struct cifs_writedata, work);
2621         struct inode *inode = d_inode(wdata->cfile->dentry);
2622
2623         if (wdata->result == 0) {
2624                 spin_lock(&inode->i_lock);
2625                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2626                 spin_unlock(&inode->i_lock);
2627                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2628                                          wdata->bytes);
2629         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2630                 return cifs_writev_requeue(wdata);
2631
2632         if (wdata->result == -EAGAIN)
2633                 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2634         else if (wdata->result < 0)
2635                 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2636         else
2637                 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2638
2639         if (wdata->result != -EAGAIN)
2640                 mapping_set_error(inode->i_mapping, wdata->result);
2641         kref_put(&wdata->refcount, cifs_writedata_release);
2642 }
2643
2644 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2645 {
2646         struct cifs_writedata *wdata;
2647
2648         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2649         if (wdata != NULL) {
2650                 kref_init(&wdata->refcount);
2651                 INIT_LIST_HEAD(&wdata->list);
2652                 init_completion(&wdata->done);
2653                 INIT_WORK(&wdata->work, complete);
2654         }
2655         return wdata;
2656 }
2657
2658 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2659 {
2660         struct address_space *mapping = page->mapping;
2661         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2662         char *write_data;
2663         int rc = -EFAULT;
2664         int bytes_written = 0;
2665         struct inode *inode;
2666         struct cifsFileInfo *open_file;
2667
2668         if (!mapping || !mapping->host)
2669                 return -EFAULT;
2670
2671         inode = page->mapping->host;
2672
2673         offset += (loff_t)from;
2674         write_data = kmap(page);
2675         write_data += from;
2676
2677         if ((to > PAGE_SIZE) || (from > to)) {
2678                 kunmap(page);
2679                 return -EIO;
2680         }
2681
2682         /* racing with truncate? */
2683         if (offset > mapping->host->i_size) {
2684                 kunmap(page);
2685                 return 0; /* don't care */
2686         }
2687
2688         /* check to make sure that we are not extending the file */
2689         if (mapping->host->i_size - offset < (loff_t)to)
2690                 to = (unsigned)(mapping->host->i_size - offset);
2691
2692         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2693                                     &open_file);
2694         if (!rc) {
2695                 bytes_written = cifs_write(open_file, open_file->pid,
2696                                            write_data, to - from, &offset);
2697                 cifsFileInfo_put(open_file);
2698                 /* Does mm or vfs already set times? */
2699                 simple_inode_init_ts(inode);
2700                 if ((bytes_written > 0) && (offset))
2701                         rc = 0;
2702                 else if (bytes_written < 0)
2703                         rc = bytes_written;
2704                 else
2705                         rc = -EFAULT;
2706         } else {
2707                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2708                 if (!is_retryable_error(rc))
2709                         rc = -EIO;
2710         }
2711
2712         kunmap(page);
2713         return rc;
2714 }
2715
2716 /*
2717  * Extend the region to be written back to include subsequent contiguously
2718  * dirty pages if possible, but don't sleep while doing so.
2719  */
2720 static void cifs_extend_writeback(struct address_space *mapping,
2721                                   struct xa_state *xas,
2722                                   long *_count,
2723                                   loff_t start,
2724                                   int max_pages,
2725                                   loff_t max_len,
2726                                   size_t *_len)
2727 {
2728         struct folio_batch batch;
2729         struct folio *folio;
2730         unsigned int nr_pages;
2731         pgoff_t index = (start + *_len) / PAGE_SIZE;
2732         size_t len;
2733         bool stop = true;
2734         unsigned int i;
2735
2736         folio_batch_init(&batch);
2737
2738         do {
2739                 /* Firstly, we gather up a batch of contiguous dirty pages
2740                  * under the RCU read lock - but we can't clear the dirty flags
2741                  * there if any of those pages are mapped.
2742                  */
2743                 rcu_read_lock();
2744
2745                 xas_for_each(xas, folio, ULONG_MAX) {
2746                         stop = true;
2747                         if (xas_retry(xas, folio))
2748                                 continue;
2749                         if (xa_is_value(folio))
2750                                 break;
2751                         if (folio->index != index) {
2752                                 xas_reset(xas);
2753                                 break;
2754                         }
2755
2756                         if (!folio_try_get_rcu(folio)) {
2757                                 xas_reset(xas);
2758                                 continue;
2759                         }
2760                         nr_pages = folio_nr_pages(folio);
2761                         if (nr_pages > max_pages) {
2762                                 xas_reset(xas);
2763                                 break;
2764                         }
2765
2766                         /* Has the page moved or been split? */
2767                         if (unlikely(folio != xas_reload(xas))) {
2768                                 folio_put(folio);
2769                                 xas_reset(xas);
2770                                 break;
2771                         }
2772
2773                         if (!folio_trylock(folio)) {
2774                                 folio_put(folio);
2775                                 xas_reset(xas);
2776                                 break;
2777                         }
2778                         if (!folio_test_dirty(folio) ||
2779                             folio_test_writeback(folio)) {
2780                                 folio_unlock(folio);
2781                                 folio_put(folio);
2782                                 xas_reset(xas);
2783                                 break;
2784                         }
2785
2786                         max_pages -= nr_pages;
2787                         len = folio_size(folio);
2788                         stop = false;
2789
2790                         index += nr_pages;
2791                         *_count -= nr_pages;
2792                         *_len += len;
2793                         if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2794                                 stop = true;
2795
2796                         if (!folio_batch_add(&batch, folio))
2797                                 break;
2798                         if (stop)
2799                                 break;
2800                 }
2801
2802                 xas_pause(xas);
2803                 rcu_read_unlock();
2804
2805                 /* Now, if we obtained any pages, we can shift them to being
2806                  * writable and mark them for caching.
2807                  */
2808                 if (!folio_batch_count(&batch))
2809                         break;
2810
2811                 for (i = 0; i < folio_batch_count(&batch); i++) {
2812                         folio = batch.folios[i];
2813                         /* The folio should be locked, dirty and not undergoing
2814                          * writeback from the loop above.
2815                          */
2816                         if (!folio_clear_dirty_for_io(folio))
2817                                 WARN_ON(1);
2818                         folio_start_writeback(folio);
2819                         folio_unlock(folio);
2820                 }
2821
2822                 folio_batch_release(&batch);
2823                 cond_resched();
2824         } while (!stop);
2825 }
2826
2827 /*
2828  * Write back the locked page and any subsequent non-locked dirty pages.
2829  */
2830 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2831                                                  struct writeback_control *wbc,
2832                                                  struct xa_state *xas,
2833                                                  struct folio *folio,
2834                                                  unsigned long long start,
2835                                                  unsigned long long end)
2836 {
2837         struct inode *inode = mapping->host;
2838         struct TCP_Server_Info *server;
2839         struct cifs_writedata *wdata;
2840         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2841         struct cifs_credits credits_on_stack;
2842         struct cifs_credits *credits = &credits_on_stack;
2843         struct cifsFileInfo *cfile = NULL;
2844         unsigned long long i_size = i_size_read(inode), max_len;
2845         unsigned int xid, wsize;
2846         size_t len = folio_size(folio);
2847         long count = wbc->nr_to_write;
2848         int rc;
2849
2850         /* The folio should be locked, dirty and not undergoing writeback. */
2851         if (!folio_clear_dirty_for_io(folio))
2852                 WARN_ON_ONCE(1);
2853         folio_start_writeback(folio);
2854
2855         count -= folio_nr_pages(folio);
2856
2857         xid = get_xid();
2858         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2859
2860         rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2861         if (rc) {
2862                 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2863                 goto err_xid;
2864         }
2865
2866         rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2867                                            &wsize, credits);
2868         if (rc != 0)
2869                 goto err_close;
2870
2871         wdata = cifs_writedata_alloc(cifs_writev_complete);
2872         if (!wdata) {
2873                 rc = -ENOMEM;
2874                 goto err_uncredit;
2875         }
2876
2877         wdata->sync_mode = wbc->sync_mode;
2878         wdata->offset = folio_pos(folio);
2879         wdata->pid = cfile->pid;
2880         wdata->credits = credits_on_stack;
2881         wdata->cfile = cfile;
2882         wdata->server = server;
2883         cfile = NULL;
2884
2885         /* Find all consecutive lockable dirty pages that have contiguous
2886          * written regions, stopping when we find a page that is not
2887          * immediately lockable, is not dirty or is missing, or we reach the
2888          * end of the range.
2889          */
2890         if (start < i_size) {
2891                 /* Trim the write to the EOF; the extra data is ignored.  Also
2892                  * put an upper limit on the size of a single storedata op.
2893                  */
2894                 max_len = wsize;
2895                 max_len = min_t(unsigned long long, max_len, end - start + 1);
2896                 max_len = min_t(unsigned long long, max_len, i_size - start);
2897
2898                 if (len < max_len) {
2899                         int max_pages = INT_MAX;
2900
2901 #ifdef CONFIG_CIFS_SMB_DIRECT
2902                         if (server->smbd_conn)
2903                                 max_pages = server->smbd_conn->max_frmr_depth;
2904 #endif
2905                         max_pages -= folio_nr_pages(folio);
2906
2907                         if (max_pages > 0)
2908                                 cifs_extend_writeback(mapping, xas, &count, start,
2909                                                       max_pages, max_len, &len);
2910                 }
2911         }
2912         len = min_t(unsigned long long, len, i_size - start);
2913
2914         /* We now have a contiguous set of dirty pages, each with writeback
2915          * set; the first page is still locked at this point, but all the rest
2916          * have been unlocked.
2917          */
2918         folio_unlock(folio);
2919         wdata->bytes = len;
2920
2921         if (start < i_size) {
2922                 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2923                                 start, len);
2924
2925                 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2926                 if (rc)
2927                         goto err_wdata;
2928
2929                 if (wdata->cfile->invalidHandle)
2930                         rc = -EAGAIN;
2931                 else
2932                         rc = wdata->server->ops->async_writev(wdata,
2933                                                               cifs_writedata_release);
2934                 if (rc >= 0) {
2935                         kref_put(&wdata->refcount, cifs_writedata_release);
2936                         goto err_close;
2937                 }
2938         } else {
2939                 /* The dirty region was entirely beyond the EOF. */
2940                 cifs_pages_written_back(inode, start, len);
2941                 rc = 0;
2942         }
2943
2944 err_wdata:
2945         kref_put(&wdata->refcount, cifs_writedata_release);
2946 err_uncredit:
2947         add_credits_and_wake_if(server, credits, 0);
2948 err_close:
2949         if (cfile)
2950                 cifsFileInfo_put(cfile);
2951 err_xid:
2952         free_xid(xid);
2953         if (rc == 0) {
2954                 wbc->nr_to_write = count;
2955                 rc = len;
2956         } else if (is_retryable_error(rc)) {
2957                 cifs_pages_write_redirty(inode, start, len);
2958         } else {
2959                 cifs_pages_write_failed(inode, start, len);
2960                 mapping_set_error(mapping, rc);
2961         }
2962         /* Indication to update ctime and mtime as close is deferred */
2963         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2964         return rc;
2965 }
2966
2967 /*
2968  * write a region of pages back to the server
2969  */
2970 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2971                                      struct writeback_control *wbc,
2972                                      struct xa_state *xas,
2973                                      unsigned long long *_start,
2974                                      unsigned long long end)
2975 {
2976         struct folio *folio;
2977         unsigned long long start = *_start;
2978         ssize_t ret;
2979         int skips = 0;
2980
2981 search_again:
2982         /* Find the first dirty page. */
2983         rcu_read_lock();
2984
2985         for (;;) {
2986                 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2987                 if (xas_retry(xas, folio) || xa_is_value(folio))
2988                         continue;
2989                 if (!folio)
2990                         break;
2991
2992                 if (!folio_try_get_rcu(folio)) {
2993                         xas_reset(xas);
2994                         continue;
2995                 }
2996
2997                 if (unlikely(folio != xas_reload(xas))) {
2998                         folio_put(folio);
2999                         xas_reset(xas);
3000                         continue;
3001                 }
3002
3003                 xas_pause(xas);
3004                 break;
3005         }
3006         rcu_read_unlock();
3007         if (!folio)
3008                 return 0;
3009
3010         start = folio_pos(folio); /* May regress with THPs */
3011
3012         /* At this point we hold neither the i_pages lock nor the page lock:
3013          * the page may be truncated or invalidated (changing page->mapping to
3014          * NULL), or even swizzled back from swapper_space to tmpfs file
3015          * mapping
3016          */
3017 lock_again:
3018         if (wbc->sync_mode != WB_SYNC_NONE) {
3019                 ret = folio_lock_killable(folio);
3020                 if (ret < 0)
3021                         return ret;
3022         } else {
3023                 if (!folio_trylock(folio))
3024                         goto search_again;
3025         }
3026
3027         if (folio->mapping != mapping ||
3028             !folio_test_dirty(folio)) {
3029                 start += folio_size(folio);
3030                 folio_unlock(folio);
3031                 goto search_again;
3032         }
3033
3034         if (folio_test_writeback(folio) ||
3035             folio_test_fscache(folio)) {
3036                 folio_unlock(folio);
3037                 if (wbc->sync_mode != WB_SYNC_NONE) {
3038                         folio_wait_writeback(folio);
3039 #ifdef CONFIG_CIFS_FSCACHE
3040                         folio_wait_fscache(folio);
3041 #endif
3042                         goto lock_again;
3043                 }
3044
3045                 start += folio_size(folio);
3046                 if (wbc->sync_mode == WB_SYNC_NONE) {
3047                         if (skips >= 5 || need_resched()) {
3048                                 ret = 0;
3049                                 goto out;
3050                         }
3051                         skips++;
3052                 }
3053                 goto search_again;
3054         }
3055
3056         ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3057 out:
3058         if (ret > 0)
3059                 *_start = start + ret;
3060         return ret;
3061 }
3062
3063 /*
3064  * Write a region of pages back to the server
3065  */
3066 static int cifs_writepages_region(struct address_space *mapping,
3067                                   struct writeback_control *wbc,
3068                                   unsigned long long *_start,
3069                                   unsigned long long end)
3070 {
3071         ssize_t ret;
3072
3073         XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3074
3075         do {
3076                 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3077                 if (ret > 0 && wbc->nr_to_write > 0)
3078                         cond_resched();
3079         } while (ret > 0 && wbc->nr_to_write > 0);
3080
3081         return ret > 0 ? 0 : ret;
3082 }
3083
3084 /*
3085  * Write some of the pending data back to the server
3086  */
3087 static int cifs_writepages(struct address_space *mapping,
3088                            struct writeback_control *wbc)
3089 {
3090         loff_t start, end;
3091         int ret;
3092
3093         /* We have to be careful as we can end up racing with setattr()
3094          * truncating the pagecache since the caller doesn't take a lock here
3095          * to prevent it.
3096          */
3097
3098         if (wbc->range_cyclic && mapping->writeback_index) {
3099                 start = mapping->writeback_index * PAGE_SIZE;
3100                 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3101                 if (ret < 0)
3102                         goto out;
3103
3104                 if (wbc->nr_to_write <= 0) {
3105                         mapping->writeback_index = start / PAGE_SIZE;
3106                         goto out;
3107                 }
3108
3109                 start = 0;
3110                 end = mapping->writeback_index * PAGE_SIZE;
3111                 mapping->writeback_index = 0;
3112                 ret = cifs_writepages_region(mapping, wbc, &start, end);
3113                 if (ret == 0)
3114                         mapping->writeback_index = start / PAGE_SIZE;
3115         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3116                 start = 0;
3117                 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3118                 if (wbc->nr_to_write > 0 && ret == 0)
3119                         mapping->writeback_index = start / PAGE_SIZE;
3120         } else {
3121                 start = wbc->range_start;
3122                 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3123         }
3124
3125 out:
3126         return ret;
3127 }
3128
3129 static int
3130 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3131 {
3132         int rc;
3133         unsigned int xid;
3134
3135         xid = get_xid();
3136 /* BB add check for wbc flags */
3137         get_page(page);
3138         if (!PageUptodate(page))
3139                 cifs_dbg(FYI, "ppw - page not up to date\n");
3140
3141         /*
3142          * Set the "writeback" flag, and clear "dirty" in the radix tree.
3143          *
3144          * A writepage() implementation always needs to do either this,
3145          * or re-dirty the page with "redirty_page_for_writepage()" in
3146          * the case of a failure.
3147          *
3148          * Just unlocking the page will cause the radix tree tag-bits
3149          * to fail to update with the state of the page correctly.
3150          */
3151         set_page_writeback(page);
3152 retry_write:
3153         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3154         if (is_retryable_error(rc)) {
3155                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3156                         goto retry_write;
3157                 redirty_page_for_writepage(wbc, page);
3158         } else if (rc != 0) {
3159                 SetPageError(page);
3160                 mapping_set_error(page->mapping, rc);
3161         } else {
3162                 SetPageUptodate(page);
3163         }
3164         end_page_writeback(page);
3165         put_page(page);
3166         free_xid(xid);
3167         return rc;
3168 }
3169
3170 static int cifs_write_end(struct file *file, struct address_space *mapping,
3171                         loff_t pos, unsigned len, unsigned copied,
3172                         struct page *page, void *fsdata)
3173 {
3174         int rc;
3175         struct inode *inode = mapping->host;
3176         struct cifsFileInfo *cfile = file->private_data;
3177         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3178         struct folio *folio = page_folio(page);
3179         __u32 pid;
3180
3181         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3182                 pid = cfile->pid;
3183         else
3184                 pid = current->tgid;
3185
3186         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3187                  page, pos, copied);
3188
3189         if (folio_test_checked(folio)) {
3190                 if (copied == len)
3191                         folio_mark_uptodate(folio);
3192                 folio_clear_checked(folio);
3193         } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3194                 folio_mark_uptodate(folio);
3195
3196         if (!folio_test_uptodate(folio)) {
3197                 char *page_data;
3198                 unsigned offset = pos & (PAGE_SIZE - 1);
3199                 unsigned int xid;
3200
3201                 xid = get_xid();
3202                 /* this is probably better than directly calling
3203                    partialpage_write since in this function the file handle is
3204                    known which we might as well leverage */
3205                 /* BB check if anything else missing out of ppw
3206                    such as updating last write time */
3207                 page_data = kmap(page);
3208                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3209                 /* if (rc < 0) should we set writebehind rc? */
3210                 kunmap(page);
3211
3212                 free_xid(xid);
3213         } else {
3214                 rc = copied;
3215                 pos += copied;
3216                 set_page_dirty(page);
3217         }
3218
3219         if (rc > 0) {
3220                 spin_lock(&inode->i_lock);
3221                 if (pos > inode->i_size) {
3222                         loff_t additional_blocks = (512 - 1 + copied) >> 9;
3223
3224                         i_size_write(inode, pos);
3225                         /*
3226                          * Estimate new allocation size based on the amount written.
3227                          * This will be updated from server on close (and on queryinfo)
3228                          */
3229                         inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3230                                                 inode->i_blocks + additional_blocks);
3231                 }
3232                 spin_unlock(&inode->i_lock);
3233         }
3234
3235         unlock_page(page);
3236         put_page(page);
3237         /* Indication to update ctime and mtime as close is deferred */
3238         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3239
3240         return rc;
3241 }
3242
3243 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3244                       int datasync)
3245 {
3246         unsigned int xid;
3247         int rc = 0;
3248         struct cifs_tcon *tcon;
3249         struct TCP_Server_Info *server;
3250         struct cifsFileInfo *smbfile = file->private_data;
3251         struct inode *inode = file_inode(file);
3252         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3253
3254         rc = file_write_and_wait_range(file, start, end);
3255         if (rc) {
3256                 trace_cifs_fsync_err(inode->i_ino, rc);
3257                 return rc;
3258         }
3259
3260         xid = get_xid();
3261
3262         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3263                  file, datasync);
3264
3265         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3266                 rc = cifs_zap_mapping(inode);
3267                 if (rc) {
3268                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3269                         rc = 0; /* don't care about it in fsync */
3270                 }
3271         }
3272
3273         tcon = tlink_tcon(smbfile->tlink);
3274         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3275                 server = tcon->ses->server;
3276                 if (server->ops->flush == NULL) {
3277                         rc = -ENOSYS;
3278                         goto strict_fsync_exit;
3279                 }
3280
3281                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3282                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3283                         if (smbfile) {
3284                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3285                                 cifsFileInfo_put(smbfile);
3286                         } else
3287                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3288                 } else
3289                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3290         }
3291
3292 strict_fsync_exit:
3293         free_xid(xid);
3294         return rc;
3295 }
3296
3297 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3298 {
3299         unsigned int xid;
3300         int rc = 0;
3301         struct cifs_tcon *tcon;
3302         struct TCP_Server_Info *server;
3303         struct cifsFileInfo *smbfile = file->private_data;
3304         struct inode *inode = file_inode(file);
3305         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3306
3307         rc = file_write_and_wait_range(file, start, end);
3308         if (rc) {
3309                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3310                 return rc;
3311         }
3312
3313         xid = get_xid();
3314
3315         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3316                  file, datasync);
3317
3318         tcon = tlink_tcon(smbfile->tlink);
3319         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3320                 server = tcon->ses->server;
3321                 if (server->ops->flush == NULL) {
3322                         rc = -ENOSYS;
3323                         goto fsync_exit;
3324                 }
3325
3326                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3327                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3328                         if (smbfile) {
3329                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3330                                 cifsFileInfo_put(smbfile);
3331                         } else
3332                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3333                 } else
3334                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3335         }
3336
3337 fsync_exit:
3338         free_xid(xid);
3339         return rc;
3340 }
3341
3342 /*
3343  * As file closes, flush all cached write data for this inode checking
3344  * for write behind errors.
3345  */
3346 int cifs_flush(struct file *file, fl_owner_t id)
3347 {
3348         struct inode *inode = file_inode(file);
3349         int rc = 0;
3350
3351         if (file->f_mode & FMODE_WRITE)
3352                 rc = filemap_write_and_wait(inode->i_mapping);
3353
3354         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3355         if (rc) {
3356                 /* get more nuanced writeback errors */
3357                 rc = filemap_check_wb_err(file->f_mapping, 0);
3358                 trace_cifs_flush_err(inode->i_ino, rc);
3359         }
3360         return rc;
3361 }
3362
3363 static void
3364 cifs_uncached_writedata_release(struct kref *refcount)
3365 {
3366         struct cifs_writedata *wdata = container_of(refcount,
3367                                         struct cifs_writedata, refcount);
3368
3369         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3370         cifs_writedata_release(refcount);
3371 }
3372
3373 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3374
3375 static void
3376 cifs_uncached_writev_complete(struct work_struct *work)
3377 {
3378         struct cifs_writedata *wdata = container_of(work,
3379                                         struct cifs_writedata, work);
3380         struct inode *inode = d_inode(wdata->cfile->dentry);
3381         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3382
3383         spin_lock(&inode->i_lock);
3384         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3385         if (cifsi->netfs.remote_i_size > inode->i_size)
3386                 i_size_write(inode, cifsi->netfs.remote_i_size);
3387         spin_unlock(&inode->i_lock);
3388
3389         complete(&wdata->done);
3390         collect_uncached_write_data(wdata->ctx);
3391         /* the below call can possibly free the last ref to aio ctx */
3392         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3393 }
3394
3395 static int
3396 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3397         struct cifs_aio_ctx *ctx)
3398 {
3399         unsigned int wsize;
3400         struct cifs_credits credits;
3401         int rc;
3402         struct TCP_Server_Info *server = wdata->server;
3403
3404         do {
3405                 if (wdata->cfile->invalidHandle) {
3406                         rc = cifs_reopen_file(wdata->cfile, false);
3407                         if (rc == -EAGAIN)
3408                                 continue;
3409                         else if (rc)
3410                                 break;
3411                 }
3412
3413
3414                 /*
3415                  * Wait for credits to resend this wdata.
3416                  * Note: we are attempting to resend the whole wdata not in
3417                  * segments
3418                  */
3419                 do {
3420                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3421                                                 &wsize, &credits);
3422                         if (rc)
3423                                 goto fail;
3424
3425                         if (wsize < wdata->bytes) {
3426                                 add_credits_and_wake_if(server, &credits, 0);
3427                                 msleep(1000);
3428                         }
3429                 } while (wsize < wdata->bytes);
3430                 wdata->credits = credits;
3431
3432                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3433
3434                 if (!rc) {
3435                         if (wdata->cfile->invalidHandle)
3436                                 rc = -EAGAIN;
3437                         else {
3438                                 wdata->replay = true;
3439 #ifdef CONFIG_CIFS_SMB_DIRECT
3440                                 if (wdata->mr) {
3441                                         wdata->mr->need_invalidate = true;
3442                                         smbd_deregister_mr(wdata->mr);
3443                                         wdata->mr = NULL;
3444                                 }
3445 #endif
3446                                 rc = server->ops->async_writev(wdata,
3447                                         cifs_uncached_writedata_release);
3448                         }
3449                 }
3450
3451                 /* If the write was successfully sent, we are done */
3452                 if (!rc) {
3453                         list_add_tail(&wdata->list, wdata_list);
3454                         return 0;
3455                 }
3456
3457                 /* Roll back credits and retry if needed */
3458                 add_credits_and_wake_if(server, &wdata->credits, 0);
3459         } while (rc == -EAGAIN);
3460
3461 fail:
3462         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3463         return rc;
3464 }
3465
3466 /*
3467  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3468  * size and maximum number of segments.
3469  */
3470 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3471                                      size_t max_segs, unsigned int *_nsegs)
3472 {
3473         const struct bio_vec *bvecs = iter->bvec;
3474         unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3475         size_t len, span = 0, n = iter->count;
3476         size_t skip = iter->iov_offset;
3477
3478         if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3479                 return 0;
3480
3481         while (n && ix < nbv && skip) {
3482                 len = bvecs[ix].bv_len;
3483                 if (skip < len)
3484                         break;
3485                 skip -= len;
3486                 n -= len;
3487                 ix++;
3488         }
3489
3490         while (n && ix < nbv) {
3491                 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3492                 span += len;
3493                 max_size -= len;
3494                 nsegs++;
3495                 ix++;
3496                 if (max_size == 0 || nsegs >= max_segs)
3497                         break;
3498                 skip = 0;
3499                 n -= len;
3500         }
3501
3502         *_nsegs = nsegs;
3503         return span;
3504 }
3505
3506 static int
3507 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3508                      struct cifsFileInfo *open_file,
3509                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3510                      struct cifs_aio_ctx *ctx)
3511 {
3512         int rc = 0;
3513         size_t cur_len, max_len;
3514         struct cifs_writedata *wdata;
3515         pid_t pid;
3516         struct TCP_Server_Info *server;
3517         unsigned int xid, max_segs = INT_MAX;
3518
3519         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3520                 pid = open_file->pid;
3521         else
3522                 pid = current->tgid;
3523
3524         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3525         xid = get_xid();
3526
3527 #ifdef CONFIG_CIFS_SMB_DIRECT
3528         if (server->smbd_conn)
3529                 max_segs = server->smbd_conn->max_frmr_depth;
3530 #endif
3531
3532         do {
3533                 struct cifs_credits credits_on_stack;
3534                 struct cifs_credits *credits = &credits_on_stack;
3535                 unsigned int wsize, nsegs = 0;
3536
3537                 if (signal_pending(current)) {
3538                         rc = -EINTR;
3539                         break;
3540                 }
3541
3542                 if (open_file->invalidHandle) {
3543                         rc = cifs_reopen_file(open_file, false);
3544                         if (rc == -EAGAIN)
3545                                 continue;
3546                         else if (rc)
3547                                 break;
3548                 }
3549
3550                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3551                                                    &wsize, credits);
3552                 if (rc)
3553                         break;
3554
3555                 max_len = min_t(const size_t, len, wsize);
3556                 if (!max_len) {
3557                         rc = -EAGAIN;
3558                         add_credits_and_wake_if(server, credits, 0);
3559                         break;
3560                 }
3561
3562                 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3563                 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3564                          cur_len, max_len, nsegs, from->nr_segs, max_segs);
3565                 if (cur_len == 0) {
3566                         rc = -EIO;
3567                         add_credits_and_wake_if(server, credits, 0);
3568                         break;
3569                 }
3570
3571                 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3572                 if (!wdata) {
3573                         rc = -ENOMEM;
3574                         add_credits_and_wake_if(server, credits, 0);
3575                         break;
3576                 }
3577
3578                 wdata->sync_mode = WB_SYNC_ALL;
3579                 wdata->offset   = (__u64)fpos;
3580                 wdata->cfile    = cifsFileInfo_get(open_file);
3581                 wdata->server   = server;
3582                 wdata->pid      = pid;
3583                 wdata->bytes    = cur_len;
3584                 wdata->credits  = credits_on_stack;
3585                 wdata->iter     = *from;
3586                 wdata->ctx      = ctx;
3587                 kref_get(&ctx->refcount);
3588
3589                 iov_iter_truncate(&wdata->iter, cur_len);
3590
3591                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3592
3593                 if (!rc) {
3594                         if (wdata->cfile->invalidHandle)
3595                                 rc = -EAGAIN;
3596                         else
3597                                 rc = server->ops->async_writev(wdata,
3598                                         cifs_uncached_writedata_release);
3599                 }
3600
3601                 if (rc) {
3602                         add_credits_and_wake_if(server, &wdata->credits, 0);
3603                         kref_put(&wdata->refcount,
3604                                  cifs_uncached_writedata_release);
3605                         if (rc == -EAGAIN)
3606                                 continue;
3607                         break;
3608                 }
3609
3610                 list_add_tail(&wdata->list, wdata_list);
3611                 iov_iter_advance(from, cur_len);
3612                 fpos += cur_len;
3613                 len -= cur_len;
3614         } while (len > 0);
3615
3616         free_xid(xid);
3617         return rc;
3618 }
3619
3620 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3621 {
3622         struct cifs_writedata *wdata, *tmp;
3623         struct cifs_tcon *tcon;
3624         struct cifs_sb_info *cifs_sb;
3625         struct dentry *dentry = ctx->cfile->dentry;
3626         ssize_t rc;
3627
3628         tcon = tlink_tcon(ctx->cfile->tlink);
3629         cifs_sb = CIFS_SB(dentry->d_sb);
3630
3631         mutex_lock(&ctx->aio_mutex);
3632
3633         if (list_empty(&ctx->list)) {
3634                 mutex_unlock(&ctx->aio_mutex);
3635                 return;
3636         }
3637
3638         rc = ctx->rc;
3639         /*
3640          * Wait for and collect replies for any successful sends in order of
3641          * increasing offset. Once an error is hit, then return without waiting
3642          * for any more replies.
3643          */
3644 restart_loop:
3645         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3646                 if (!rc) {
3647                         if (!try_wait_for_completion(&wdata->done)) {
3648                                 mutex_unlock(&ctx->aio_mutex);
3649                                 return;
3650                         }
3651
3652                         if (wdata->result)
3653                                 rc = wdata->result;
3654                         else
3655                                 ctx->total_len += wdata->bytes;
3656
3657                         /* resend call if it's a retryable error */
3658                         if (rc == -EAGAIN) {
3659                                 struct list_head tmp_list;
3660                                 struct iov_iter tmp_from = ctx->iter;
3661
3662                                 INIT_LIST_HEAD(&tmp_list);
3663                                 list_del_init(&wdata->list);
3664
3665                                 if (ctx->direct_io)
3666                                         rc = cifs_resend_wdata(
3667                                                 wdata, &tmp_list, ctx);
3668                                 else {
3669                                         iov_iter_advance(&tmp_from,
3670                                                  wdata->offset - ctx->pos);
3671
3672                                         rc = cifs_write_from_iter(wdata->offset,
3673                                                 wdata->bytes, &tmp_from,
3674                                                 ctx->cfile, cifs_sb, &tmp_list,
3675                                                 ctx);
3676
3677                                         kref_put(&wdata->refcount,
3678                                                 cifs_uncached_writedata_release);
3679                                 }
3680
3681                                 list_splice(&tmp_list, &ctx->list);
3682                                 goto restart_loop;
3683                         }
3684                 }
3685                 list_del_init(&wdata->list);
3686                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3687         }
3688
3689         cifs_stats_bytes_written(tcon, ctx->total_len);
3690         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3691
3692         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3693
3694         mutex_unlock(&ctx->aio_mutex);
3695
3696         if (ctx->iocb && ctx->iocb->ki_complete)
3697                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3698         else
3699                 complete(&ctx->done);
3700 }
3701
3702 static ssize_t __cifs_writev(
3703         struct kiocb *iocb, struct iov_iter *from, bool direct)
3704 {
3705         struct file *file = iocb->ki_filp;
3706         ssize_t total_written = 0;
3707         struct cifsFileInfo *cfile;
3708         struct cifs_tcon *tcon;
3709         struct cifs_sb_info *cifs_sb;
3710         struct cifs_aio_ctx *ctx;
3711         int rc;
3712
3713         rc = generic_write_checks(iocb, from);
3714         if (rc <= 0)
3715                 return rc;
3716
3717         cifs_sb = CIFS_FILE_SB(file);
3718         cfile = file->private_data;
3719         tcon = tlink_tcon(cfile->tlink);
3720
3721         if (!tcon->ses->server->ops->async_writev)
3722                 return -ENOSYS;
3723
3724         ctx = cifs_aio_ctx_alloc();
3725         if (!ctx)
3726                 return -ENOMEM;
3727
3728         ctx->cfile = cifsFileInfo_get(cfile);
3729
3730         if (!is_sync_kiocb(iocb))
3731                 ctx->iocb = iocb;
3732
3733         ctx->pos = iocb->ki_pos;
3734         ctx->direct_io = direct;
3735         ctx->nr_pinned_pages = 0;
3736
3737         if (user_backed_iter(from)) {
3738                 /*
3739                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3740                  * they contain references to the calling process's virtual
3741                  * memory layout which won't be available in an async worker
3742                  * thread.  This also takes a pin on every folio involved.
3743                  */
3744                 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3745                                              &ctx->iter, 0);
3746                 if (rc < 0) {
3747                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3748                         return rc;
3749                 }
3750
3751                 ctx->nr_pinned_pages = rc;
3752                 ctx->bv = (void *)ctx->iter.bvec;
3753                 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3754         } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3755                    !is_sync_kiocb(iocb)) {
3756                 /*
3757                  * If the op is asynchronous, we need to copy the list attached
3758                  * to a BVEC/KVEC-type iterator, but we assume that the storage
3759                  * will be pinned by the caller; in any case, we may or may not
3760                  * be able to pin the pages, so we don't try.
3761                  */
3762                 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3763                 if (!ctx->bv) {
3764                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3765                         return -ENOMEM;
3766                 }
3767         } else {
3768                 /*
3769                  * Otherwise, we just pass the iterator down as-is and rely on
3770                  * the caller to make sure the pages referred to by the
3771                  * iterator don't evaporate.
3772                  */
3773                 ctx->iter = *from;
3774         }
3775
3776         ctx->len = iov_iter_count(&ctx->iter);
3777
3778         /* grab a lock here due to read response handlers can access ctx */
3779         mutex_lock(&ctx->aio_mutex);
3780
3781         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3782                                   cfile, cifs_sb, &ctx->list, ctx);
3783
3784         /*
3785          * If at least one write was successfully sent, then discard any rc
3786          * value from the later writes. If the other write succeeds, then
3787          * we'll end up returning whatever was written. If it fails, then
3788          * we'll get a new rc value from that.
3789          */
3790         if (!list_empty(&ctx->list))
3791                 rc = 0;
3792
3793         mutex_unlock(&ctx->aio_mutex);
3794
3795         if (rc) {
3796                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3797                 return rc;
3798         }
3799
3800         if (!is_sync_kiocb(iocb)) {
3801                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3802                 return -EIOCBQUEUED;
3803         }
3804
3805         rc = wait_for_completion_killable(&ctx->done);
3806         if (rc) {
3807                 mutex_lock(&ctx->aio_mutex);
3808                 ctx->rc = rc = -EINTR;
3809                 total_written = ctx->total_len;
3810                 mutex_unlock(&ctx->aio_mutex);
3811         } else {
3812                 rc = ctx->rc;
3813                 total_written = ctx->total_len;
3814         }
3815
3816         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3817
3818         if (unlikely(!total_written))
3819                 return rc;
3820
3821         iocb->ki_pos += total_written;
3822         return total_written;
3823 }
3824
3825 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3826 {
3827         struct file *file = iocb->ki_filp;
3828
3829         cifs_revalidate_mapping(file->f_inode);
3830         return __cifs_writev(iocb, from, true);
3831 }
3832
3833 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3834 {
3835         return __cifs_writev(iocb, from, false);
3836 }
3837
3838 static ssize_t
3839 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3840 {
3841         struct file *file = iocb->ki_filp;
3842         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3843         struct inode *inode = file->f_mapping->host;
3844         struct cifsInodeInfo *cinode = CIFS_I(inode);
3845         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3846         ssize_t rc;
3847
3848         inode_lock(inode);
3849         /*
3850          * We need to hold the sem to be sure nobody modifies lock list
3851          * with a brlock that prevents writing.
3852          */
3853         down_read(&cinode->lock_sem);
3854
3855         rc = generic_write_checks(iocb, from);
3856         if (rc <= 0)
3857                 goto out;
3858
3859         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3860                                      server->vals->exclusive_lock_type, 0,
3861                                      NULL, CIFS_WRITE_OP))
3862                 rc = __generic_file_write_iter(iocb, from);
3863         else
3864                 rc = -EACCES;
3865 out:
3866         up_read(&cinode->lock_sem);
3867         inode_unlock(inode);
3868
3869         if (rc > 0)
3870                 rc = generic_write_sync(iocb, rc);
3871         return rc;
3872 }
3873
3874 ssize_t
3875 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3876 {
3877         struct inode *inode = file_inode(iocb->ki_filp);
3878         struct cifsInodeInfo *cinode = CIFS_I(inode);
3879         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3880         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3881                                                 iocb->ki_filp->private_data;
3882         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3883         ssize_t written;
3884
3885         written = cifs_get_writer(cinode);
3886         if (written)
3887                 return written;
3888
3889         if (CIFS_CACHE_WRITE(cinode)) {
3890                 if (cap_unix(tcon->ses) &&
3891                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3892                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3893                         written = generic_file_write_iter(iocb, from);
3894                         goto out;
3895                 }
3896                 written = cifs_writev(iocb, from);
3897                 goto out;
3898         }
3899         /*
3900          * For non-oplocked files in strict cache mode we need to write the data
3901          * to the server exactly from the pos to pos+len-1 rather than flush all
3902          * affected pages because it may cause a error with mandatory locks on
3903          * these pages but not on the region from pos to ppos+len-1.
3904          */
3905         written = cifs_user_writev(iocb, from);
3906         if (CIFS_CACHE_READ(cinode)) {
3907                 /*
3908                  * We have read level caching and we have just sent a write
3909                  * request to the server thus making data in the cache stale.
3910                  * Zap the cache and set oplock/lease level to NONE to avoid
3911                  * reading stale data from the cache. All subsequent read
3912                  * operations will read new data from the server.
3913                  */
3914                 cifs_zap_mapping(inode);
3915                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3916                          inode);
3917                 cinode->oplock = 0;
3918         }
3919 out:
3920         cifs_put_writer(cinode);
3921         return written;
3922 }
3923
3924 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3925 {
3926         struct cifs_readdata *rdata;
3927
3928         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3929         if (rdata) {
3930                 kref_init(&rdata->refcount);
3931                 INIT_LIST_HEAD(&rdata->list);
3932                 init_completion(&rdata->done);
3933                 INIT_WORK(&rdata->work, complete);
3934         }
3935
3936         return rdata;
3937 }
3938
3939 void
3940 cifs_readdata_release(struct kref *refcount)
3941 {
3942         struct cifs_readdata *rdata = container_of(refcount,
3943                                         struct cifs_readdata, refcount);
3944
3945         if (rdata->ctx)
3946                 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3947 #ifdef CONFIG_CIFS_SMB_DIRECT
3948         if (rdata->mr) {
3949                 smbd_deregister_mr(rdata->mr);
3950                 rdata->mr = NULL;
3951         }
3952 #endif
3953         if (rdata->cfile)
3954                 cifsFileInfo_put(rdata->cfile);
3955
3956         kfree(rdata);
3957 }
3958
3959 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3960
3961 static void
3962 cifs_uncached_readv_complete(struct work_struct *work)
3963 {
3964         struct cifs_readdata *rdata = container_of(work,
3965                                                 struct cifs_readdata, work);
3966
3967         complete(&rdata->done);
3968         collect_uncached_read_data(rdata->ctx);
3969         /* the below call can possibly free the last ref to aio ctx */
3970         kref_put(&rdata->refcount, cifs_readdata_release);
3971 }
3972
3973 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3974                         struct list_head *rdata_list,
3975                         struct cifs_aio_ctx *ctx)
3976 {
3977         unsigned int rsize;
3978         struct cifs_credits credits;
3979         int rc;
3980         struct TCP_Server_Info *server;
3981
3982         /* XXX: should we pick a new channel here? */
3983         server = rdata->server;
3984
3985         do {
3986                 if (rdata->cfile->invalidHandle) {
3987                         rc = cifs_reopen_file(rdata->cfile, true);
3988                         if (rc == -EAGAIN)
3989                                 continue;
3990                         else if (rc)
3991                                 break;
3992                 }
3993
3994                 /*
3995                  * Wait for credits to resend this rdata.
3996                  * Note: we are attempting to resend the whole rdata not in
3997                  * segments
3998                  */
3999                 do {
4000                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
4001                                                 &rsize, &credits);
4002
4003                         if (rc)
4004                                 goto fail;
4005
4006                         if (rsize < rdata->bytes) {
4007                                 add_credits_and_wake_if(server, &credits, 0);
4008                                 msleep(1000);
4009                         }
4010                 } while (rsize < rdata->bytes);
4011                 rdata->credits = credits;
4012
4013                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4014                 if (!rc) {
4015                         if (rdata->cfile->invalidHandle)
4016                                 rc = -EAGAIN;
4017                         else {
4018 #ifdef CONFIG_CIFS_SMB_DIRECT
4019                                 if (rdata->mr) {
4020                                         rdata->mr->need_invalidate = true;
4021                                         smbd_deregister_mr(rdata->mr);
4022                                         rdata->mr = NULL;
4023                                 }
4024 #endif
4025                                 rc = server->ops->async_readv(rdata);
4026                         }
4027                 }
4028
4029                 /* If the read was successfully sent, we are done */
4030                 if (!rc) {
4031                         /* Add to aio pending list */
4032                         list_add_tail(&rdata->list, rdata_list);
4033                         return 0;
4034                 }
4035
4036                 /* Roll back credits and retry if needed */
4037                 add_credits_and_wake_if(server, &rdata->credits, 0);
4038         } while (rc == -EAGAIN);
4039
4040 fail:
4041         kref_put(&rdata->refcount, cifs_readdata_release);
4042         return rc;
4043 }
4044
4045 static int
4046 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4047                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4048                      struct cifs_aio_ctx *ctx)
4049 {
4050         struct cifs_readdata *rdata;
4051         unsigned int rsize, nsegs, max_segs = INT_MAX;
4052         struct cifs_credits credits_on_stack;
4053         struct cifs_credits *credits = &credits_on_stack;
4054         size_t cur_len, max_len;
4055         int rc;
4056         pid_t pid;
4057         struct TCP_Server_Info *server;
4058
4059         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4060
4061 #ifdef CONFIG_CIFS_SMB_DIRECT
4062         if (server->smbd_conn)
4063                 max_segs = server->smbd_conn->max_frmr_depth;
4064 #endif
4065
4066         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067                 pid = open_file->pid;
4068         else
4069                 pid = current->tgid;
4070
4071         do {
4072                 if (open_file->invalidHandle) {
4073                         rc = cifs_reopen_file(open_file, true);
4074                         if (rc == -EAGAIN)
4075                                 continue;
4076                         else if (rc)
4077                                 break;
4078                 }
4079
4080                 if (cifs_sb->ctx->rsize == 0)
4081                         cifs_sb->ctx->rsize =
4082                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4083                                                              cifs_sb->ctx);
4084
4085                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4086                                                    &rsize, credits);
4087                 if (rc)
4088                         break;
4089
4090                 max_len = min_t(size_t, len, rsize);
4091
4092                 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4093                                                  max_segs, &nsegs);
4094                 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4095                          cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4096                 if (cur_len == 0) {
4097                         rc = -EIO;
4098                         add_credits_and_wake_if(server, credits, 0);
4099                         break;
4100                 }
4101
4102                 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4103                 if (!rdata) {
4104                         add_credits_and_wake_if(server, credits, 0);
4105                         rc = -ENOMEM;
4106                         break;
4107                 }
4108
4109                 rdata->server   = server;
4110                 rdata->cfile    = cifsFileInfo_get(open_file);
4111                 rdata->offset   = fpos;
4112                 rdata->bytes    = cur_len;
4113                 rdata->pid      = pid;
4114                 rdata->credits  = credits_on_stack;
4115                 rdata->ctx      = ctx;
4116                 kref_get(&ctx->refcount);
4117
4118                 rdata->iter     = ctx->iter;
4119                 iov_iter_truncate(&rdata->iter, cur_len);
4120
4121                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4122
4123                 if (!rc) {
4124                         if (rdata->cfile->invalidHandle)
4125                                 rc = -EAGAIN;
4126                         else
4127                                 rc = server->ops->async_readv(rdata);
4128                 }
4129
4130                 if (rc) {
4131                         add_credits_and_wake_if(server, &rdata->credits, 0);
4132                         kref_put(&rdata->refcount, cifs_readdata_release);
4133                         if (rc == -EAGAIN)
4134                                 continue;
4135                         break;
4136                 }
4137
4138                 list_add_tail(&rdata->list, rdata_list);
4139                 iov_iter_advance(&ctx->iter, cur_len);
4140                 fpos += cur_len;
4141                 len -= cur_len;
4142         } while (len > 0);
4143
4144         return rc;
4145 }
4146
4147 static void
4148 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4149 {
4150         struct cifs_readdata *rdata, *tmp;
4151         struct cifs_sb_info *cifs_sb;
4152         int rc;
4153
4154         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4155
4156         mutex_lock(&ctx->aio_mutex);
4157
4158         if (list_empty(&ctx->list)) {
4159                 mutex_unlock(&ctx->aio_mutex);
4160                 return;
4161         }
4162
4163         rc = ctx->rc;
4164         /* the loop below should proceed in the order of increasing offsets */
4165 again:
4166         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4167                 if (!rc) {
4168                         if (!try_wait_for_completion(&rdata->done)) {
4169                                 mutex_unlock(&ctx->aio_mutex);
4170                                 return;
4171                         }
4172
4173                         if (rdata->result == -EAGAIN) {
4174                                 /* resend call if it's a retryable error */
4175                                 struct list_head tmp_list;
4176                                 unsigned int got_bytes = rdata->got_bytes;
4177
4178                                 list_del_init(&rdata->list);
4179                                 INIT_LIST_HEAD(&tmp_list);
4180
4181                                 if (ctx->direct_io) {
4182                                         /*
4183                                          * Re-use rdata as this is a
4184                                          * direct I/O
4185                                          */
4186                                         rc = cifs_resend_rdata(
4187                                                 rdata,
4188                                                 &tmp_list, ctx);
4189                                 } else {
4190                                         rc = cifs_send_async_read(
4191                                                 rdata->offset + got_bytes,
4192                                                 rdata->bytes - got_bytes,
4193                                                 rdata->cfile, cifs_sb,
4194                                                 &tmp_list, ctx);
4195
4196                                         kref_put(&rdata->refcount,
4197                                                 cifs_readdata_release);
4198                                 }
4199
4200                                 list_splice(&tmp_list, &ctx->list);
4201
4202                                 goto again;
4203                         } else if (rdata->result)
4204                                 rc = rdata->result;
4205
4206                         /* if there was a short read -- discard anything left */
4207                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4208                                 rc = -ENODATA;
4209
4210                         ctx->total_len += rdata->got_bytes;
4211                 }
4212                 list_del_init(&rdata->list);
4213                 kref_put(&rdata->refcount, cifs_readdata_release);
4214         }
4215
4216         /* mask nodata case */
4217         if (rc == -ENODATA)
4218                 rc = 0;
4219
4220         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4221
4222         mutex_unlock(&ctx->aio_mutex);
4223
4224         if (ctx->iocb && ctx->iocb->ki_complete)
4225                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4226         else
4227                 complete(&ctx->done);
4228 }
4229
4230 static ssize_t __cifs_readv(
4231         struct kiocb *iocb, struct iov_iter *to, bool direct)
4232 {
4233         size_t len;
4234         struct file *file = iocb->ki_filp;
4235         struct cifs_sb_info *cifs_sb;
4236         struct cifsFileInfo *cfile;
4237         struct cifs_tcon *tcon;
4238         ssize_t rc, total_read = 0;
4239         loff_t offset = iocb->ki_pos;
4240         struct cifs_aio_ctx *ctx;
4241
4242         len = iov_iter_count(to);
4243         if (!len)
4244                 return 0;
4245
4246         cifs_sb = CIFS_FILE_SB(file);
4247         cfile = file->private_data;
4248         tcon = tlink_tcon(cfile->tlink);
4249
4250         if (!tcon->ses->server->ops->async_readv)
4251                 return -ENOSYS;
4252
4253         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4254                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4255
4256         ctx = cifs_aio_ctx_alloc();
4257         if (!ctx)
4258                 return -ENOMEM;
4259
4260         ctx->pos        = offset;
4261         ctx->direct_io  = direct;
4262         ctx->len        = len;
4263         ctx->cfile      = cifsFileInfo_get(cfile);
4264         ctx->nr_pinned_pages = 0;
4265
4266         if (!is_sync_kiocb(iocb))
4267                 ctx->iocb = iocb;
4268
4269         if (user_backed_iter(to)) {
4270                 /*
4271                  * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4272                  * they contain references to the calling process's virtual
4273                  * memory layout which won't be available in an async worker
4274                  * thread.  This also takes a pin on every folio involved.
4275                  */
4276                 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4277                                              &ctx->iter, 0);
4278                 if (rc < 0) {
4279                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4280                         return rc;
4281                 }
4282
4283                 ctx->nr_pinned_pages = rc;
4284                 ctx->bv = (void *)ctx->iter.bvec;
4285                 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4286                 ctx->should_dirty = true;
4287         } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4288                    !is_sync_kiocb(iocb)) {
4289                 /*
4290                  * If the op is asynchronous, we need to copy the list attached
4291                  * to a BVEC/KVEC-type iterator, but we assume that the storage
4292                  * will be retained by the caller; in any case, we may or may
4293                  * not be able to pin the pages, so we don't try.
4294                  */
4295                 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4296                 if (!ctx->bv) {
4297                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4298                         return -ENOMEM;
4299                 }
4300         } else {
4301                 /*
4302                  * Otherwise, we just pass the iterator down as-is and rely on
4303                  * the caller to make sure the pages referred to by the
4304                  * iterator don't evaporate.
4305                  */
4306                 ctx->iter = *to;
4307         }
4308
4309         if (direct) {
4310                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4311                                                   offset, offset + len - 1);
4312                 if (rc) {
4313                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314                         return -EAGAIN;
4315                 }
4316         }
4317
4318         /* grab a lock here due to read response handlers can access ctx */
4319         mutex_lock(&ctx->aio_mutex);
4320
4321         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4322
4323         /* if at least one read request send succeeded, then reset rc */
4324         if (!list_empty(&ctx->list))
4325                 rc = 0;
4326
4327         mutex_unlock(&ctx->aio_mutex);
4328
4329         if (rc) {
4330                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4331                 return rc;
4332         }
4333
4334         if (!is_sync_kiocb(iocb)) {
4335                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4336                 return -EIOCBQUEUED;
4337         }
4338
4339         rc = wait_for_completion_killable(&ctx->done);
4340         if (rc) {
4341                 mutex_lock(&ctx->aio_mutex);
4342                 ctx->rc = rc = -EINTR;
4343                 total_read = ctx->total_len;
4344                 mutex_unlock(&ctx->aio_mutex);
4345         } else {
4346                 rc = ctx->rc;
4347                 total_read = ctx->total_len;
4348         }
4349
4350         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4351
4352         if (total_read) {
4353                 iocb->ki_pos += total_read;
4354                 return total_read;
4355         }
4356         return rc;
4357 }
4358
4359 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4360 {
4361         return __cifs_readv(iocb, to, true);
4362 }
4363
4364 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4365 {
4366         return __cifs_readv(iocb, to, false);
4367 }
4368
4369 ssize_t
4370 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4371 {
4372         struct inode *inode = file_inode(iocb->ki_filp);
4373         struct cifsInodeInfo *cinode = CIFS_I(inode);
4374         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4375         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4376                                                 iocb->ki_filp->private_data;
4377         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4378         int rc = -EACCES;
4379
4380         /*
4381          * In strict cache mode we need to read from the server all the time
4382          * if we don't have level II oplock because the server can delay mtime
4383          * change - so we can't make a decision about inode invalidating.
4384          * And we can also fail with pagereading if there are mandatory locks
4385          * on pages affected by this read but not on the region from pos to
4386          * pos+len-1.
4387          */
4388         if (!CIFS_CACHE_READ(cinode))
4389                 return cifs_user_readv(iocb, to);
4390
4391         if (cap_unix(tcon->ses) &&
4392             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4393             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4394                 return generic_file_read_iter(iocb, to);
4395
4396         /*
4397          * We need to hold the sem to be sure nobody modifies lock list
4398          * with a brlock that prevents reading.
4399          */
4400         down_read(&cinode->lock_sem);
4401         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4402                                      tcon->ses->server->vals->shared_lock_type,
4403                                      0, NULL, CIFS_READ_OP))
4404                 rc = generic_file_read_iter(iocb, to);
4405         up_read(&cinode->lock_sem);
4406         return rc;
4407 }
4408
4409 static ssize_t
4410 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4411 {
4412         int rc = -EACCES;
4413         unsigned int bytes_read = 0;
4414         unsigned int total_read;
4415         unsigned int current_read_size;
4416         unsigned int rsize;
4417         struct cifs_sb_info *cifs_sb;
4418         struct cifs_tcon *tcon;
4419         struct TCP_Server_Info *server;
4420         unsigned int xid;
4421         char *cur_offset;
4422         struct cifsFileInfo *open_file;
4423         struct cifs_io_parms io_parms = {0};
4424         int buf_type = CIFS_NO_BUFFER;
4425         __u32 pid;
4426
4427         xid = get_xid();
4428         cifs_sb = CIFS_FILE_SB(file);
4429
4430         /* FIXME: set up handlers for larger reads and/or convert to async */
4431         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4432
4433         if (file->private_data == NULL) {
4434                 rc = -EBADF;
4435                 free_xid(xid);
4436                 return rc;
4437         }
4438         open_file = file->private_data;
4439         tcon = tlink_tcon(open_file->tlink);
4440         server = cifs_pick_channel(tcon->ses);
4441
4442         if (!server->ops->sync_read) {
4443                 free_xid(xid);
4444                 return -ENOSYS;
4445         }
4446
4447         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4448                 pid = open_file->pid;
4449         else
4450                 pid = current->tgid;
4451
4452         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4453                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4454
4455         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4456              total_read += bytes_read, cur_offset += bytes_read) {
4457                 do {
4458                         current_read_size = min_t(uint, read_size - total_read,
4459                                                   rsize);
4460                         /*
4461                          * For windows me and 9x we do not want to request more
4462                          * than it negotiated since it will refuse the read
4463                          * then.
4464                          */
4465                         if (!(tcon->ses->capabilities &
4466                                 tcon->ses->server->vals->cap_large_files)) {
4467                                 current_read_size = min_t(uint,
4468                                         current_read_size, CIFSMaxBufSize);
4469                         }
4470                         if (open_file->invalidHandle) {
4471                                 rc = cifs_reopen_file(open_file, true);
4472                                 if (rc != 0)
4473                                         break;
4474                         }
4475                         io_parms.pid = pid;
4476                         io_parms.tcon = tcon;
4477                         io_parms.offset = *offset;
4478                         io_parms.length = current_read_size;
4479                         io_parms.server = server;
4480                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4481                                                     &bytes_read, &cur_offset,
4482                                                     &buf_type);
4483                 } while (rc == -EAGAIN);
4484
4485                 if (rc || (bytes_read == 0)) {
4486                         if (total_read) {
4487                                 break;
4488                         } else {
4489                                 free_xid(xid);
4490                                 return rc;
4491                         }
4492                 } else {
4493                         cifs_stats_bytes_read(tcon, total_read);
4494                         *offset += bytes_read;
4495                 }
4496         }
4497         free_xid(xid);
4498         return total_read;
4499 }
4500
4501 /*
4502  * If the page is mmap'ed into a process' page tables, then we need to make
4503  * sure that it doesn't change while being written back.
4504  */
4505 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4506 {
4507         struct folio *folio = page_folio(vmf->page);
4508
4509         /* Wait for the folio to be written to the cache before we allow it to
4510          * be modified.  We then assume the entire folio will need writing back.
4511          */
4512 #ifdef CONFIG_CIFS_FSCACHE
4513         if (folio_test_fscache(folio) &&
4514             folio_wait_fscache_killable(folio) < 0)
4515                 return VM_FAULT_RETRY;
4516 #endif
4517
4518         folio_wait_writeback(folio);
4519
4520         if (folio_lock_killable(folio) < 0)
4521                 return VM_FAULT_RETRY;
4522         return VM_FAULT_LOCKED;
4523 }
4524
4525 static const struct vm_operations_struct cifs_file_vm_ops = {
4526         .fault = filemap_fault,
4527         .map_pages = filemap_map_pages,
4528         .page_mkwrite = cifs_page_mkwrite,
4529 };
4530
4531 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4532 {
4533         int xid, rc = 0;
4534         struct inode *inode = file_inode(file);
4535
4536         xid = get_xid();
4537
4538         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4539                 rc = cifs_zap_mapping(inode);
4540         if (!rc)
4541                 rc = generic_file_mmap(file, vma);
4542         if (!rc)
4543                 vma->vm_ops = &cifs_file_vm_ops;
4544
4545         free_xid(xid);
4546         return rc;
4547 }
4548
4549 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4550 {
4551         int rc, xid;
4552
4553         xid = get_xid();
4554
4555         rc = cifs_revalidate_file(file);
4556         if (rc)
4557                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4558                          rc);
4559         if (!rc)
4560                 rc = generic_file_mmap(file, vma);
4561         if (!rc)
4562                 vma->vm_ops = &cifs_file_vm_ops;
4563
4564         free_xid(xid);
4565         return rc;
4566 }
4567
4568 /*
4569  * Unlock a bunch of folios in the pagecache.
4570  */
4571 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4572 {
4573         struct folio *folio;
4574         XA_STATE(xas, &mapping->i_pages, first);
4575
4576         rcu_read_lock();
4577         xas_for_each(&xas, folio, last) {
4578                 folio_unlock(folio);
4579         }
4580         rcu_read_unlock();
4581 }
4582
4583 static void cifs_readahead_complete(struct work_struct *work)
4584 {
4585         struct cifs_readdata *rdata = container_of(work,
4586                                                    struct cifs_readdata, work);
4587         struct folio *folio;
4588         pgoff_t last;
4589         bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4590
4591         XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4592
4593         if (good)
4594                 cifs_readahead_to_fscache(rdata->mapping->host,
4595                                           rdata->offset, rdata->bytes);
4596
4597         if (iov_iter_count(&rdata->iter) > 0)
4598                 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4599
4600         last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4601
4602         rcu_read_lock();
4603         xas_for_each(&xas, folio, last) {
4604                 if (good) {
4605                         flush_dcache_folio(folio);
4606                         folio_mark_uptodate(folio);
4607                 }
4608                 folio_unlock(folio);
4609         }
4610         rcu_read_unlock();
4611
4612         kref_put(&rdata->refcount, cifs_readdata_release);
4613 }
4614
4615 static void cifs_readahead(struct readahead_control *ractl)
4616 {
4617         struct cifsFileInfo *open_file = ractl->file->private_data;
4618         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4619         struct TCP_Server_Info *server;
4620         unsigned int xid, nr_pages, cache_nr_pages = 0;
4621         unsigned int ra_pages;
4622         pgoff_t next_cached = ULONG_MAX, ra_index;
4623         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4624                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4625         bool check_cache = caching;
4626         pid_t pid;
4627         int rc = 0;
4628
4629         /* Note that readahead_count() lags behind our dequeuing of pages from
4630          * the ractl, wo we have to keep track for ourselves.
4631          */
4632         ra_pages = readahead_count(ractl);
4633         ra_index = readahead_index(ractl);
4634
4635         xid = get_xid();
4636
4637         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4638                 pid = open_file->pid;
4639         else
4640                 pid = current->tgid;
4641
4642         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4643
4644         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4645                  __func__, ractl->file, ractl->mapping, ra_pages);
4646
4647         /*
4648          * Chop the readahead request up into rsize-sized read requests.
4649          */
4650         while ((nr_pages = ra_pages)) {
4651                 unsigned int i, rsize;
4652                 struct cifs_readdata *rdata;
4653                 struct cifs_credits credits_on_stack;
4654                 struct cifs_credits *credits = &credits_on_stack;
4655                 struct folio *folio;
4656                 pgoff_t fsize;
4657
4658                 /*
4659                  * Find out if we have anything cached in the range of
4660                  * interest, and if so, where the next chunk of cached data is.
4661                  */
4662                 if (caching) {
4663                         if (check_cache) {
4664                                 rc = cifs_fscache_query_occupancy(
4665                                         ractl->mapping->host, ra_index, nr_pages,
4666                                         &next_cached, &cache_nr_pages);
4667                                 if (rc < 0)
4668                                         caching = false;
4669                                 check_cache = false;
4670                         }
4671
4672                         if (ra_index == next_cached) {
4673                                 /*
4674                                  * TODO: Send a whole batch of pages to be read
4675                                  * by the cache.
4676                                  */
4677                                 folio = readahead_folio(ractl);
4678                                 fsize = folio_nr_pages(folio);
4679                                 ra_pages -= fsize;
4680                                 ra_index += fsize;
4681                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4682                                                                &folio->page) < 0) {
4683                                         /*
4684                                          * TODO: Deal with cache read failure
4685                                          * here, but for the moment, delegate
4686                                          * that to readpage.
4687                                          */
4688                                         caching = false;
4689                                 }
4690                                 folio_unlock(folio);
4691                                 next_cached += fsize;
4692                                 cache_nr_pages -= fsize;
4693                                 if (cache_nr_pages == 0)
4694                                         check_cache = true;
4695                                 continue;
4696                         }
4697                 }
4698
4699                 if (open_file->invalidHandle) {
4700                         rc = cifs_reopen_file(open_file, true);
4701                         if (rc) {
4702                                 if (rc == -EAGAIN)
4703                                         continue;
4704                                 break;
4705                         }
4706                 }
4707
4708                 if (cifs_sb->ctx->rsize == 0)
4709                         cifs_sb->ctx->rsize =
4710                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4711                                                              cifs_sb->ctx);
4712
4713                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4714                                                    &rsize, credits);
4715                 if (rc)
4716                         break;
4717                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4718                 if (next_cached != ULONG_MAX)
4719                         nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4720
4721                 /*
4722                  * Give up immediately if rsize is too small to read an entire
4723                  * page. The VFS will fall back to readpage. We should never
4724                  * reach this point however since we set ra_pages to 0 when the
4725                  * rsize is smaller than a cache page.
4726                  */
4727                 if (unlikely(!nr_pages)) {
4728                         add_credits_and_wake_if(server, credits, 0);
4729                         break;
4730                 }
4731
4732                 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4733                 if (!rdata) {
4734                         /* best to give up if we're out of mem */
4735                         add_credits_and_wake_if(server, credits, 0);
4736                         break;
4737                 }
4738
4739                 rdata->offset   = ra_index * PAGE_SIZE;
4740                 rdata->bytes    = nr_pages * PAGE_SIZE;
4741                 rdata->cfile    = cifsFileInfo_get(open_file);
4742                 rdata->server   = server;
4743                 rdata->mapping  = ractl->mapping;
4744                 rdata->pid      = pid;
4745                 rdata->credits  = credits_on_stack;
4746
4747                 for (i = 0; i < nr_pages; i++) {
4748                         if (!readahead_folio(ractl))
4749                                 WARN_ON(1);
4750                 }
4751                 ra_pages -= nr_pages;
4752                 ra_index += nr_pages;
4753
4754                 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4755                                 rdata->offset, rdata->bytes);
4756
4757                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4758                 if (!rc) {
4759                         if (rdata->cfile->invalidHandle)
4760                                 rc = -EAGAIN;
4761                         else
4762                                 rc = server->ops->async_readv(rdata);
4763                 }
4764
4765                 if (rc) {
4766                         add_credits_and_wake_if(server, &rdata->credits, 0);
4767                         cifs_unlock_folios(rdata->mapping,
4768                                            rdata->offset / PAGE_SIZE,
4769                                            (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4770                         /* Fallback to the readpage in error/reconnect cases */
4771                         kref_put(&rdata->refcount, cifs_readdata_release);
4772                         break;
4773                 }
4774
4775                 kref_put(&rdata->refcount, cifs_readdata_release);
4776         }
4777
4778         free_xid(xid);
4779 }
4780
4781 /*
4782  * cifs_readpage_worker must be called with the page pinned
4783  */
4784 static int cifs_readpage_worker(struct file *file, struct page *page,
4785         loff_t *poffset)
4786 {
4787         struct inode *inode = file_inode(file);
4788         struct timespec64 atime, mtime;
4789         char *read_data;
4790         int rc;
4791
4792         /* Is the page cached? */
4793         rc = cifs_readpage_from_fscache(inode, page);
4794         if (rc == 0)
4795                 goto read_complete;
4796
4797         read_data = kmap(page);
4798         /* for reads over a certain size could initiate async read ahead */
4799
4800         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4801
4802         if (rc < 0)
4803                 goto io_error;
4804         else
4805                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4806
4807         /* we do not want atime to be less than mtime, it broke some apps */
4808         atime = inode_set_atime_to_ts(inode, current_time(inode));
4809         mtime = inode_get_mtime(inode);
4810         if (timespec64_compare(&atime, &mtime) < 0)
4811                 inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4812
4813         if (PAGE_SIZE > rc)
4814                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4815
4816         flush_dcache_page(page);
4817         SetPageUptodate(page);
4818         rc = 0;
4819
4820 io_error:
4821         kunmap(page);
4822
4823 read_complete:
4824         unlock_page(page);
4825         return rc;
4826 }
4827
4828 static int cifs_read_folio(struct file *file, struct folio *folio)
4829 {
4830         struct page *page = &folio->page;
4831         loff_t offset = page_file_offset(page);
4832         int rc = -EACCES;
4833         unsigned int xid;
4834
4835         xid = get_xid();
4836
4837         if (file->private_data == NULL) {
4838                 rc = -EBADF;
4839                 free_xid(xid);
4840                 return rc;
4841         }
4842
4843         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4844                  page, (int)offset, (int)offset);
4845
4846         rc = cifs_readpage_worker(file, page, &offset);
4847
4848         free_xid(xid);
4849         return rc;
4850 }
4851
4852 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4853 {
4854         struct cifsFileInfo *open_file;
4855
4856         spin_lock(&cifs_inode->open_file_lock);
4857         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4858                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4859                         spin_unlock(&cifs_inode->open_file_lock);
4860                         return 1;
4861                 }
4862         }
4863         spin_unlock(&cifs_inode->open_file_lock);
4864         return 0;
4865 }
4866
4867 /* We do not want to update the file size from server for inodes
4868    open for write - to avoid races with writepage extending
4869    the file - in the future we could consider allowing
4870    refreshing the inode only on increases in the file size
4871    but this is tricky to do without racing with writebehind
4872    page caching in the current Linux kernel design */
4873 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4874                             bool from_readdir)
4875 {
4876         if (!cifsInode)
4877                 return true;
4878
4879         if (is_inode_writable(cifsInode) ||
4880                 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4881                 /* This inode is open for write at least once */
4882                 struct cifs_sb_info *cifs_sb;
4883
4884                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4885                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4886                         /* since no page cache to corrupt on directio
4887                         we can change size safely */
4888                         return true;
4889                 }
4890
4891                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4892                         return true;
4893
4894                 return false;
4895         } else
4896                 return true;
4897 }
4898
4899 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4900                         loff_t pos, unsigned len,
4901                         struct page **pagep, void **fsdata)
4902 {
4903         int oncethru = 0;
4904         pgoff_t index = pos >> PAGE_SHIFT;
4905         loff_t offset = pos & (PAGE_SIZE - 1);
4906         loff_t page_start = pos & PAGE_MASK;
4907         loff_t i_size;
4908         struct page *page;
4909         int rc = 0;
4910
4911         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4912
4913 start:
4914         page = grab_cache_page_write_begin(mapping, index);
4915         if (!page) {
4916                 rc = -ENOMEM;
4917                 goto out;
4918         }
4919
4920         if (PageUptodate(page))
4921                 goto out;
4922
4923         /*
4924          * If we write a full page it will be up to date, no need to read from
4925          * the server. If the write is short, we'll end up doing a sync write
4926          * instead.
4927          */
4928         if (len == PAGE_SIZE)
4929                 goto out;
4930
4931         /*
4932          * optimize away the read when we have an oplock, and we're not
4933          * expecting to use any of the data we'd be reading in. That
4934          * is, when the page lies beyond the EOF, or straddles the EOF
4935          * and the write will cover all of the existing data.
4936          */
4937         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4938                 i_size = i_size_read(mapping->host);
4939                 if (page_start >= i_size ||
4940                     (offset == 0 && (pos + len) >= i_size)) {
4941                         zero_user_segments(page, 0, offset,
4942                                            offset + len,
4943                                            PAGE_SIZE);
4944                         /*
4945                          * PageChecked means that the parts of the page
4946                          * to which we're not writing are considered up
4947                          * to date. Once the data is copied to the
4948                          * page, it can be set uptodate.
4949                          */
4950                         SetPageChecked(page);
4951                         goto out;
4952                 }
4953         }
4954
4955         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4956                 /*
4957                  * might as well read a page, it is fast enough. If we get
4958                  * an error, we don't need to return it. cifs_write_end will
4959                  * do a sync write instead since PG_uptodate isn't set.
4960                  */
4961                 cifs_readpage_worker(file, page, &page_start);
4962                 put_page(page);
4963                 oncethru = 1;
4964                 goto start;
4965         } else {
4966                 /* we could try using another file handle if there is one -
4967                    but how would we lock it to prevent close of that handle
4968                    racing with this read? In any case
4969                    this will be written out by write_end so is fine */
4970         }
4971 out:
4972         *pagep = page;
4973         return rc;
4974 }
4975
4976 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4977 {
4978         if (folio_test_private(folio))
4979                 return 0;
4980         if (folio_test_fscache(folio)) {
4981                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4982                         return false;
4983                 folio_wait_fscache(folio);
4984         }
4985         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4986         return true;
4987 }
4988
4989 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4990                                  size_t length)
4991 {
4992         folio_wait_fscache(folio);
4993 }
4994
4995 static int cifs_launder_folio(struct folio *folio)
4996 {
4997         int rc = 0;
4998         loff_t range_start = folio_pos(folio);
4999         loff_t range_end = range_start + folio_size(folio);
5000         struct writeback_control wbc = {
5001                 .sync_mode = WB_SYNC_ALL,
5002                 .nr_to_write = 0,
5003                 .range_start = range_start,
5004                 .range_end = range_end,
5005         };
5006
5007         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5008
5009         if (folio_clear_dirty_for_io(folio))
5010                 rc = cifs_writepage_locked(&folio->page, &wbc);
5011
5012         folio_wait_fscache(folio);
5013         return rc;
5014 }
5015
5016 void cifs_oplock_break(struct work_struct *work)
5017 {
5018         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5019                                                   oplock_break);
5020         struct inode *inode = d_inode(cfile->dentry);
5021         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5022         struct cifsInodeInfo *cinode = CIFS_I(inode);
5023         struct cifs_tcon *tcon;
5024         struct TCP_Server_Info *server;
5025         struct tcon_link *tlink;
5026         int rc = 0;
5027         bool purge_cache = false, oplock_break_cancelled;
5028         __u64 persistent_fid, volatile_fid;
5029         __u16 net_fid;
5030
5031         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5032                         TASK_UNINTERRUPTIBLE);
5033
5034         tlink = cifs_sb_tlink(cifs_sb);
5035         if (IS_ERR(tlink))
5036                 goto out;
5037         tcon = tlink_tcon(tlink);
5038         server = tcon->ses->server;
5039
5040         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5041                                       cfile->oplock_epoch, &purge_cache);
5042
5043         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5044                                                 cifs_has_mand_locks(cinode)) {
5045                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5046                          inode);
5047                 cinode->oplock = 0;
5048         }
5049
5050         if (inode && S_ISREG(inode->i_mode)) {
5051                 if (CIFS_CACHE_READ(cinode))
5052                         break_lease(inode, O_RDONLY);
5053                 else
5054                         break_lease(inode, O_WRONLY);
5055                 rc = filemap_fdatawrite(inode->i_mapping);
5056                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5057                         rc = filemap_fdatawait(inode->i_mapping);
5058                         mapping_set_error(inode->i_mapping, rc);
5059                         cifs_zap_mapping(inode);
5060                 }
5061                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5062                 if (CIFS_CACHE_WRITE(cinode))
5063                         goto oplock_break_ack;
5064         }
5065
5066         rc = cifs_push_locks(cfile);
5067         if (rc)
5068                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5069
5070 oplock_break_ack:
5071         /*
5072          * When oplock break is received and there are no active
5073          * file handles but cached, then schedule deferred close immediately.
5074          * So, new open will not use cached handle.
5075          */
5076
5077         if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5078                 cifs_close_deferred_file(cinode);
5079
5080         persistent_fid = cfile->fid.persistent_fid;
5081         volatile_fid = cfile->fid.volatile_fid;
5082         net_fid = cfile->fid.netfid;
5083         oplock_break_cancelled = cfile->oplock_break_cancelled;
5084
5085         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5086         /*
5087          * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5088          * an acknowledgment to be sent when the file has already been closed.
5089          */
5090         spin_lock(&cinode->open_file_lock);
5091         /* check list empty since can race with kill_sb calling tree disconnect */
5092         if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5093                 spin_unlock(&cinode->open_file_lock);
5094                 rc = server->ops->oplock_response(tcon, persistent_fid,
5095                                                   volatile_fid, net_fid, cinode);
5096                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5097         } else
5098                 spin_unlock(&cinode->open_file_lock);
5099
5100         cifs_put_tlink(tlink);
5101 out:
5102         cifs_done_oplock_break(cinode);
5103 }
5104
5105 /*
5106  * The presence of cifs_direct_io() in the address space ops vector
5107  * allowes open() O_DIRECT flags which would have failed otherwise.
5108  *
5109  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5110  * so this method should never be called.
5111  *
5112  * Direct IO is not yet supported in the cached mode.
5113  */
5114 static ssize_t
5115 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5116 {
5117         /*
5118          * FIXME
5119          * Eventually need to support direct IO for non forcedirectio mounts
5120          */
5121         return -EINVAL;
5122 }
5123
5124 static int cifs_swap_activate(struct swap_info_struct *sis,
5125                               struct file *swap_file, sector_t *span)
5126 {
5127         struct cifsFileInfo *cfile = swap_file->private_data;
5128         struct inode *inode = swap_file->f_mapping->host;
5129         unsigned long blocks;
5130         long long isize;
5131
5132         cifs_dbg(FYI, "swap activate\n");
5133
5134         if (!swap_file->f_mapping->a_ops->swap_rw)
5135                 /* Cannot support swap */
5136                 return -EINVAL;
5137
5138         spin_lock(&inode->i_lock);
5139         blocks = inode->i_blocks;
5140         isize = inode->i_size;
5141         spin_unlock(&inode->i_lock);
5142         if (blocks*512 < isize) {
5143                 pr_warn("swap activate: swapfile has holes\n");
5144                 return -EINVAL;
5145         }
5146         *span = sis->pages;
5147
5148         pr_warn_once("Swap support over SMB3 is experimental\n");
5149
5150         /*
5151          * TODO: consider adding ACL (or documenting how) to prevent other
5152          * users (on this or other systems) from reading it
5153          */
5154
5155
5156         /* TODO: add sk_set_memalloc(inet) or similar */
5157
5158         if (cfile)
5159                 cfile->swapfile = true;
5160         /*
5161          * TODO: Since file already open, we can't open with DENY_ALL here
5162          * but we could add call to grab a byte range lock to prevent others
5163          * from reading or writing the file
5164          */
5165
5166         sis->flags |= SWP_FS_OPS;
5167         return add_swap_extent(sis, 0, sis->max, 0);
5168 }
5169
5170 static void cifs_swap_deactivate(struct file *file)
5171 {
5172         struct cifsFileInfo *cfile = file->private_data;
5173
5174         cifs_dbg(FYI, "swap deactivate\n");
5175
5176         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5177
5178         if (cfile)
5179                 cfile->swapfile = false;
5180
5181         /* do we need to unpin (or unlock) the file */
5182 }
5183
5184 const struct address_space_operations cifs_addr_ops = {
5185         .read_folio = cifs_read_folio,
5186         .readahead = cifs_readahead,
5187         .writepages = cifs_writepages,
5188         .write_begin = cifs_write_begin,
5189         .write_end = cifs_write_end,
5190         .dirty_folio = netfs_dirty_folio,
5191         .release_folio = cifs_release_folio,
5192         .direct_IO = cifs_direct_io,
5193         .invalidate_folio = cifs_invalidate_folio,
5194         .launder_folio = cifs_launder_folio,
5195         .migrate_folio = filemap_migrate_folio,
5196         /*
5197          * TODO: investigate and if useful we could add an is_dirty_writeback
5198          * helper if needed
5199          */
5200         .swap_activate = cifs_swap_activate,
5201         .swap_deactivate = cifs_swap_deactivate,
5202 };
5203
5204 /*
5205  * cifs_readahead requires the server to support a buffer large enough to
5206  * contain the header plus one complete page of data.  Otherwise, we need
5207  * to leave cifs_readahead out of the address space operations.
5208  */
5209 const struct address_space_operations cifs_addr_ops_smallbuf = {
5210         .read_folio = cifs_read_folio,
5211         .writepages = cifs_writepages,
5212         .write_begin = cifs_write_begin,
5213         .write_end = cifs_write_end,
5214         .dirty_folio = netfs_dirty_folio,
5215         .release_folio = cifs_release_folio,
5216         .invalidate_folio = cifs_invalidate_folio,
5217         .launder_folio = cifs_launder_folio,
5218         .migrate_folio = filemap_migrate_folio,
5219 };