Merge tag 'netfs-prep-20220318' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / fs / cachefiles / io.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/falloc.h>
13 #include <linux/sched/mm.h>
14 #include <trace/events/fscache.h>
15 #include "internal.h"
16
17 struct cachefiles_kiocb {
18         struct kiocb            iocb;
19         refcount_t              ki_refcnt;
20         loff_t                  start;
21         union {
22                 size_t          skipped;
23                 size_t          len;
24         };
25         struct cachefiles_object *object;
26         netfs_io_terminated_t   term_func;
27         void                    *term_func_priv;
28         bool                    was_async;
29         unsigned int            inval_counter;  /* Copy of cookie->inval_counter */
30         u64                     b_writing;
31 };
32
33 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
34 {
35         if (refcount_dec_and_test(&ki->ki_refcnt)) {
36                 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
37                 fput(ki->iocb.ki_filp);
38                 kfree(ki);
39         }
40 }
41
42 /*
43  * Handle completion of a read from the cache.
44  */
45 static void cachefiles_read_complete(struct kiocb *iocb, long ret)
46 {
47         struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48         struct inode *inode = file_inode(ki->iocb.ki_filp);
49
50         _enter("%ld", ret);
51
52         if (ret < 0)
53                 trace_cachefiles_io_error(ki->object, inode, ret,
54                                           cachefiles_trace_read_error);
55
56         if (ki->term_func) {
57                 if (ret >= 0) {
58                         if (ki->object->cookie->inval_counter == ki->inval_counter)
59                                 ki->skipped += ret;
60                         else
61                                 ret = -ESTALE;
62                 }
63
64                 ki->term_func(ki->term_func_priv, ret, ki->was_async);
65         }
66
67         cachefiles_put_kiocb(ki);
68 }
69
70 /*
71  * Initiate a read from the cache.
72  */
73 static int cachefiles_read(struct netfs_cache_resources *cres,
74                            loff_t start_pos,
75                            struct iov_iter *iter,
76                            enum netfs_read_from_hole read_hole,
77                            netfs_io_terminated_t term_func,
78                            void *term_func_priv)
79 {
80         struct cachefiles_object *object;
81         struct cachefiles_kiocb *ki;
82         struct file *file;
83         unsigned int old_nofs;
84         ssize_t ret = -ENOBUFS;
85         size_t len = iov_iter_count(iter), skipped = 0;
86
87         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
88                 goto presubmission_error;
89
90         fscache_count_read();
91         object = cachefiles_cres_object(cres);
92         file = cachefiles_cres_file(cres);
93
94         _enter("%pD,%li,%llx,%zx/%llx",
95                file, file_inode(file)->i_ino, start_pos, len,
96                i_size_read(file_inode(file)));
97
98         /* If the caller asked us to seek for data before doing the read, then
99          * we should do that now.  If we find a gap, we fill it with zeros.
100          */
101         if (read_hole != NETFS_READ_HOLE_IGNORE) {
102                 loff_t off = start_pos, off2;
103
104                 off2 = cachefiles_inject_read_error();
105                 if (off2 == 0)
106                         off2 = vfs_llseek(file, off, SEEK_DATA);
107                 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
108                         skipped = 0;
109                         ret = off2;
110                         goto presubmission_error;
111                 }
112
113                 if (off2 == -ENXIO || off2 >= start_pos + len) {
114                         /* The region is beyond the EOF or there's no more data
115                          * in the region, so clear the rest of the buffer and
116                          * return success.
117                          */
118                         ret = -ENODATA;
119                         if (read_hole == NETFS_READ_HOLE_FAIL)
120                                 goto presubmission_error;
121
122                         iov_iter_zero(len, iter);
123                         skipped = len;
124                         ret = 0;
125                         goto presubmission_error;
126                 }
127
128                 skipped = off2 - off;
129                 iov_iter_zero(skipped, iter);
130         }
131
132         ret = -ENOMEM;
133         ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
134         if (!ki)
135                 goto presubmission_error;
136
137         refcount_set(&ki->ki_refcnt, 2);
138         ki->iocb.ki_filp        = file;
139         ki->iocb.ki_pos         = start_pos + skipped;
140         ki->iocb.ki_flags       = IOCB_DIRECT;
141         ki->iocb.ki_ioprio      = get_current_ioprio();
142         ki->skipped             = skipped;
143         ki->object              = object;
144         ki->inval_counter       = cres->inval_counter;
145         ki->term_func           = term_func;
146         ki->term_func_priv      = term_func_priv;
147         ki->was_async           = true;
148
149         if (ki->term_func)
150                 ki->iocb.ki_complete = cachefiles_read_complete;
151
152         get_file(ki->iocb.ki_filp);
153         cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
154
155         trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
156         old_nofs = memalloc_nofs_save();
157         ret = cachefiles_inject_read_error();
158         if (ret == 0)
159                 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
160         memalloc_nofs_restore(old_nofs);
161         switch (ret) {
162         case -EIOCBQUEUED:
163                 goto in_progress;
164
165         case -ERESTARTSYS:
166         case -ERESTARTNOINTR:
167         case -ERESTARTNOHAND:
168         case -ERESTART_RESTARTBLOCK:
169                 /* There's no easy way to restart the syscall since other AIO's
170                  * may be already running. Just fail this IO with EINTR.
171                  */
172                 ret = -EINTR;
173                 fallthrough;
174         default:
175                 ki->was_async = false;
176                 cachefiles_read_complete(&ki->iocb, ret);
177                 if (ret > 0)
178                         ret = 0;
179                 break;
180         }
181
182 in_progress:
183         cachefiles_put_kiocb(ki);
184         _leave(" = %zd", ret);
185         return ret;
186
187 presubmission_error:
188         if (term_func)
189                 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
190         return ret;
191 }
192
193 /*
194  * Query the occupancy of the cache in a region, returning where the next chunk
195  * of data starts and how long it is.
196  */
197 static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
198                                       loff_t start, size_t len, size_t granularity,
199                                       loff_t *_data_start, size_t *_data_len)
200 {
201         struct cachefiles_object *object;
202         struct file *file;
203         loff_t off, off2;
204
205         *_data_start = -1;
206         *_data_len = 0;
207
208         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
209                 return -ENOBUFS;
210
211         object = cachefiles_cres_object(cres);
212         file = cachefiles_cres_file(cres);
213         granularity = max_t(size_t, object->volume->cache->bsize, granularity);
214
215         _enter("%pD,%li,%llx,%zx/%llx",
216                file, file_inode(file)->i_ino, start, len,
217                i_size_read(file_inode(file)));
218
219         off = cachefiles_inject_read_error();
220         if (off == 0)
221                 off = vfs_llseek(file, start, SEEK_DATA);
222         if (off == -ENXIO)
223                 return -ENODATA; /* Beyond EOF */
224         if (off < 0 && off >= (loff_t)-MAX_ERRNO)
225                 return -ENOBUFS; /* Error. */
226         if (round_up(off, granularity) >= start + len)
227                 return -ENODATA; /* No data in range */
228
229         off2 = cachefiles_inject_read_error();
230         if (off2 == 0)
231                 off2 = vfs_llseek(file, off, SEEK_HOLE);
232         if (off2 == -ENXIO)
233                 return -ENODATA; /* Beyond EOF */
234         if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
235                 return -ENOBUFS; /* Error. */
236
237         /* Round away partial blocks */
238         off = round_up(off, granularity);
239         off2 = round_down(off2, granularity);
240         if (off2 <= off)
241                 return -ENODATA;
242
243         *_data_start = off;
244         if (off2 > start + len)
245                 *_data_len = len;
246         else
247                 *_data_len = off2 - off;
248         return 0;
249 }
250
251 /*
252  * Handle completion of a write to the cache.
253  */
254 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
255 {
256         struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
257         struct cachefiles_object *object = ki->object;
258         struct inode *inode = file_inode(ki->iocb.ki_filp);
259
260         _enter("%ld", ret);
261
262         /* Tell lockdep we inherited freeze protection from submission thread */
263         __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
264         __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
265
266         if (ret < 0)
267                 trace_cachefiles_io_error(object, inode, ret,
268                                           cachefiles_trace_write_error);
269
270         atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
271         set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
272         if (ki->term_func)
273                 ki->term_func(ki->term_func_priv, ret, ki->was_async);
274         cachefiles_put_kiocb(ki);
275 }
276
277 /*
278  * Initiate a write to the cache.
279  */
280 static int cachefiles_write(struct netfs_cache_resources *cres,
281                             loff_t start_pos,
282                             struct iov_iter *iter,
283                             netfs_io_terminated_t term_func,
284                             void *term_func_priv)
285 {
286         struct cachefiles_object *object;
287         struct cachefiles_cache *cache;
288         struct cachefiles_kiocb *ki;
289         struct inode *inode;
290         struct file *file;
291         unsigned int old_nofs;
292         ssize_t ret = -ENOBUFS;
293         size_t len = iov_iter_count(iter);
294
295         if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
296                 goto presubmission_error;
297         fscache_count_write();
298         object = cachefiles_cres_object(cres);
299         cache = object->volume->cache;
300         file = cachefiles_cres_file(cres);
301
302         _enter("%pD,%li,%llx,%zx/%llx",
303                file, file_inode(file)->i_ino, start_pos, len,
304                i_size_read(file_inode(file)));
305
306         ret = -ENOMEM;
307         ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
308         if (!ki)
309                 goto presubmission_error;
310
311         refcount_set(&ki->ki_refcnt, 2);
312         ki->iocb.ki_filp        = file;
313         ki->iocb.ki_pos         = start_pos;
314         ki->iocb.ki_flags       = IOCB_DIRECT | IOCB_WRITE;
315         ki->iocb.ki_ioprio      = get_current_ioprio();
316         ki->object              = object;
317         ki->inval_counter       = cres->inval_counter;
318         ki->start               = start_pos;
319         ki->len                 = len;
320         ki->term_func           = term_func;
321         ki->term_func_priv      = term_func_priv;
322         ki->was_async           = true;
323         ki->b_writing           = (len + (1 << cache->bshift) - 1) >> cache->bshift;
324
325         if (ki->term_func)
326                 ki->iocb.ki_complete = cachefiles_write_complete;
327         atomic_long_add(ki->b_writing, &cache->b_writing);
328
329         /* Open-code file_start_write here to grab freeze protection, which
330          * will be released by another thread in aio_complete_rw().  Fool
331          * lockdep by telling it the lock got released so that it doesn't
332          * complain about the held lock when we return to userspace.
333          */
334         inode = file_inode(file);
335         __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
336         __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
337
338         get_file(ki->iocb.ki_filp);
339         cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
340
341         trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
342         old_nofs = memalloc_nofs_save();
343         ret = cachefiles_inject_write_error();
344         if (ret == 0)
345                 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
346         memalloc_nofs_restore(old_nofs);
347         switch (ret) {
348         case -EIOCBQUEUED:
349                 goto in_progress;
350
351         case -ERESTARTSYS:
352         case -ERESTARTNOINTR:
353         case -ERESTARTNOHAND:
354         case -ERESTART_RESTARTBLOCK:
355                 /* There's no easy way to restart the syscall since other AIO's
356                  * may be already running. Just fail this IO with EINTR.
357                  */
358                 ret = -EINTR;
359                 fallthrough;
360         default:
361                 ki->was_async = false;
362                 cachefiles_write_complete(&ki->iocb, ret);
363                 if (ret > 0)
364                         ret = 0;
365                 break;
366         }
367
368 in_progress:
369         cachefiles_put_kiocb(ki);
370         _leave(" = %zd", ret);
371         return ret;
372
373 presubmission_error:
374         if (term_func)
375                 term_func(term_func_priv, ret, false);
376         return ret;
377 }
378
379 /*
380  * Prepare a read operation, shortening it to a cached/uncached
381  * boundary as appropriate.
382  */
383 static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
384                                                       loff_t i_size)
385 {
386         enum cachefiles_prepare_read_trace why;
387         struct netfs_io_request *rreq = subreq->rreq;
388         struct netfs_cache_resources *cres = &rreq->cache_resources;
389         struct cachefiles_object *object;
390         struct cachefiles_cache *cache;
391         struct fscache_cookie *cookie = fscache_cres_cookie(cres);
392         const struct cred *saved_cred;
393         struct file *file = cachefiles_cres_file(cres);
394         enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER;
395         loff_t off, to;
396         ino_t ino = file ? file_inode(file)->i_ino : 0;
397
398         _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
399
400         if (subreq->start >= i_size) {
401                 ret = NETFS_FILL_WITH_ZEROES;
402                 why = cachefiles_trace_read_after_eof;
403                 goto out_no_object;
404         }
405
406         if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
407                 __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
408                 why = cachefiles_trace_read_no_data;
409                 goto out_no_object;
410         }
411
412         /* The object and the file may be being created in the background. */
413         if (!file) {
414                 why = cachefiles_trace_read_no_file;
415                 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
416                         goto out_no_object;
417                 file = cachefiles_cres_file(cres);
418                 if (!file)
419                         goto out_no_object;
420                 ino = file_inode(file)->i_ino;
421         }
422
423         object = cachefiles_cres_object(cres);
424         cache = object->volume->cache;
425         cachefiles_begin_secure(cache, &saved_cred);
426
427         off = cachefiles_inject_read_error();
428         if (off == 0)
429                 off = vfs_llseek(file, subreq->start, SEEK_DATA);
430         if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
431                 if (off == (loff_t)-ENXIO) {
432                         why = cachefiles_trace_read_seek_nxio;
433                         goto download_and_store;
434                 }
435                 trace_cachefiles_io_error(object, file_inode(file), off,
436                                           cachefiles_trace_seek_error);
437                 why = cachefiles_trace_read_seek_error;
438                 goto out;
439         }
440
441         if (off >= subreq->start + subreq->len) {
442                 why = cachefiles_trace_read_found_hole;
443                 goto download_and_store;
444         }
445
446         if (off > subreq->start) {
447                 off = round_up(off, cache->bsize);
448                 subreq->len = off - subreq->start;
449                 why = cachefiles_trace_read_found_part;
450                 goto download_and_store;
451         }
452
453         to = cachefiles_inject_read_error();
454         if (to == 0)
455                 to = vfs_llseek(file, subreq->start, SEEK_HOLE);
456         if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
457                 trace_cachefiles_io_error(object, file_inode(file), to,
458                                           cachefiles_trace_seek_error);
459                 why = cachefiles_trace_read_seek_error;
460                 goto out;
461         }
462
463         if (to < subreq->start + subreq->len) {
464                 if (subreq->start + subreq->len >= i_size)
465                         to = round_up(to, cache->bsize);
466                 else
467                         to = round_down(to, cache->bsize);
468                 subreq->len = to - subreq->start;
469         }
470
471         why = cachefiles_trace_read_have_data;
472         ret = NETFS_READ_FROM_CACHE;
473         goto out;
474
475 download_and_store:
476         __set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
477 out:
478         cachefiles_end_secure(cache, saved_cred);
479 out_no_object:
480         trace_cachefiles_prep_read(subreq, ret, why, ino);
481         return ret;
482 }
483
484 /*
485  * Prepare for a write to occur.
486  */
487 static int __cachefiles_prepare_write(struct netfs_cache_resources *cres,
488                                       loff_t *_start, size_t *_len, loff_t i_size,
489                                       bool no_space_allocated_yet)
490 {
491         struct cachefiles_object *object = cachefiles_cres_object(cres);
492         struct cachefiles_cache *cache = object->volume->cache;
493         struct file *file = cachefiles_cres_file(cres);
494         loff_t start = *_start, pos;
495         size_t len = *_len, down;
496         int ret;
497
498         /* Round to DIO size */
499         down = start - round_down(start, PAGE_SIZE);
500         *_start = start - down;
501         *_len = round_up(down + len, PAGE_SIZE);
502
503         /* We need to work out whether there's sufficient disk space to perform
504          * the write - but we can skip that check if we have space already
505          * allocated.
506          */
507         if (no_space_allocated_yet)
508                 goto check_space;
509
510         pos = cachefiles_inject_read_error();
511         if (pos == 0)
512                 pos = vfs_llseek(file, *_start, SEEK_DATA);
513         if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
514                 if (pos == -ENXIO)
515                         goto check_space; /* Unallocated tail */
516                 trace_cachefiles_io_error(object, file_inode(file), pos,
517                                           cachefiles_trace_seek_error);
518                 return pos;
519         }
520         if ((u64)pos >= (u64)*_start + *_len)
521                 goto check_space; /* Unallocated region */
522
523         /* We have a block that's at least partially filled - if we're low on
524          * space, we need to see if it's fully allocated.  If it's not, we may
525          * want to cull it.
526          */
527         if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
528                                  cachefiles_has_space_check) == 0)
529                 return 0; /* Enough space to simply overwrite the whole block */
530
531         pos = cachefiles_inject_read_error();
532         if (pos == 0)
533                 pos = vfs_llseek(file, *_start, SEEK_HOLE);
534         if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
535                 trace_cachefiles_io_error(object, file_inode(file), pos,
536                                           cachefiles_trace_seek_error);
537                 return pos;
538         }
539         if ((u64)pos >= (u64)*_start + *_len)
540                 return 0; /* Fully allocated */
541
542         /* Partially allocated, but insufficient space: cull. */
543         fscache_count_no_write_space();
544         ret = cachefiles_inject_remove_error();
545         if (ret == 0)
546                 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
547                                     *_start, *_len);
548         if (ret < 0) {
549                 trace_cachefiles_io_error(object, file_inode(file), ret,
550                                           cachefiles_trace_fallocate_error);
551                 cachefiles_io_error_obj(object,
552                                         "CacheFiles: fallocate failed (%d)\n", ret);
553                 ret = -EIO;
554         }
555
556         return ret;
557
558 check_space:
559         return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
560                                     cachefiles_has_space_for_write);
561 }
562
563 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
564                                     loff_t *_start, size_t *_len, loff_t i_size,
565                                     bool no_space_allocated_yet)
566 {
567         struct cachefiles_object *object = cachefiles_cres_object(cres);
568         struct cachefiles_cache *cache = object->volume->cache;
569         const struct cred *saved_cred;
570         int ret;
571
572         if (!cachefiles_cres_file(cres)) {
573                 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
574                         return -ENOBUFS;
575                 if (!cachefiles_cres_file(cres))
576                         return -ENOBUFS;
577         }
578
579         cachefiles_begin_secure(cache, &saved_cred);
580         ret = __cachefiles_prepare_write(cres, _start, _len, i_size,
581                                          no_space_allocated_yet);
582         cachefiles_end_secure(cache, saved_cred);
583         return ret;
584 }
585
586 /*
587  * Clean up an operation.
588  */
589 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
590 {
591         struct file *file = cachefiles_cres_file(cres);
592
593         if (file)
594                 fput(file);
595         fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
596 }
597
598 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
599         .end_operation          = cachefiles_end_operation,
600         .read                   = cachefiles_read,
601         .write                  = cachefiles_write,
602         .prepare_read           = cachefiles_prepare_read,
603         .prepare_write          = cachefiles_prepare_write,
604         .query_occupancy        = cachefiles_query_occupancy,
605 };
606
607 /*
608  * Open the cache file when beginning a cache operation.
609  */
610 bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
611                                 enum fscache_want_state want_state)
612 {
613         struct cachefiles_object *object = cachefiles_cres_object(cres);
614
615         if (!cachefiles_cres_file(cres)) {
616                 cres->ops = &cachefiles_netfs_cache_ops;
617                 if (object->file) {
618                         spin_lock(&object->lock);
619                         if (!cres->cache_priv2 && object->file)
620                                 cres->cache_priv2 = get_file(object->file);
621                         spin_unlock(&object->lock);
622                 }
623         }
624
625         if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
626                 pr_err("failed to get cres->file\n");
627                 return false;
628         }
629
630         return true;
631 }