Merge tag 'fuse-update-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/mszered...
[sfrench/cifs-2.6.git] / fs / fuse / readdir.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18         struct fuse_conn *fc = get_fuse_conn(dir);
19         struct fuse_inode *fi = get_fuse_inode(dir);
20
21         if (!fc->do_readdirplus)
22                 return false;
23         if (!fc->readdirplus_auto)
24                 return true;
25         if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26                 return true;
27         if (ctx->pos == 0)
28                 return true;
29         return false;
30 }
31
32 static void fuse_add_dirent_to_cache(struct file *file,
33                                      struct fuse_dirent *dirent, loff_t pos)
34 {
35         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36         size_t reclen = FUSE_DIRENT_SIZE(dirent);
37         pgoff_t index;
38         struct page *page;
39         loff_t size;
40         u64 version;
41         unsigned int offset;
42         void *addr;
43
44         spin_lock(&fi->rdc.lock);
45         /*
46          * Is cache already completed?  Or this entry does not go at the end of
47          * cache?
48          */
49         if (fi->rdc.cached || pos != fi->rdc.pos) {
50                 spin_unlock(&fi->rdc.lock);
51                 return;
52         }
53         version = fi->rdc.version;
54         size = fi->rdc.size;
55         offset = size & ~PAGE_MASK;
56         index = size >> PAGE_SHIFT;
57         /* Dirent doesn't fit in current page?  Jump to next page. */
58         if (offset + reclen > PAGE_SIZE) {
59                 index++;
60                 offset = 0;
61         }
62         spin_unlock(&fi->rdc.lock);
63
64         if (offset) {
65                 page = find_lock_page(file->f_mapping, index);
66         } else {
67                 page = find_or_create_page(file->f_mapping, index,
68                                            mapping_gfp_mask(file->f_mapping));
69         }
70         if (!page)
71                 return;
72
73         spin_lock(&fi->rdc.lock);
74         /* Raced with another readdir */
75         if (fi->rdc.version != version || fi->rdc.size != size ||
76             WARN_ON(fi->rdc.pos != pos))
77                 goto unlock;
78
79         addr = kmap_atomic(page);
80         if (!offset)
81                 clear_page(addr);
82         memcpy(addr + offset, dirent, reclen);
83         kunmap_atomic(addr);
84         fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85         fi->rdc.pos = dirent->off;
86 unlock:
87         spin_unlock(&fi->rdc.lock);
88         unlock_page(page);
89         put_page(page);
90 }
91
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93 {
94         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95         loff_t end;
96
97         spin_lock(&fi->rdc.lock);
98         /* does cache end position match current position? */
99         if (fi->rdc.pos != pos) {
100                 spin_unlock(&fi->rdc.lock);
101                 return;
102         }
103
104         fi->rdc.cached = true;
105         end = ALIGN(fi->rdc.size, PAGE_SIZE);
106         spin_unlock(&fi->rdc.lock);
107
108         /* truncate unused tail of cache */
109         truncate_inode_pages(file->f_mapping, end);
110 }
111
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113                       struct fuse_dirent *dirent)
114 {
115         struct fuse_file *ff = file->private_data;
116
117         if (ff->open_flags & FOPEN_CACHE_DIR)
118                 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119
120         return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121                         dirent->type);
122 }
123
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125                          struct dir_context *ctx)
126 {
127         while (nbytes >= FUSE_NAME_OFFSET) {
128                 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129                 size_t reclen = FUSE_DIRENT_SIZE(dirent);
130                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131                         return -EIO;
132                 if (reclen > nbytes)
133                         break;
134                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135                         return -EIO;
136
137                 if (!fuse_emit(file, ctx, dirent))
138                         break;
139
140                 buf += reclen;
141                 nbytes -= reclen;
142                 ctx->pos = dirent->off;
143         }
144
145         return 0;
146 }
147
148 static int fuse_direntplus_link(struct file *file,
149                                 struct fuse_direntplus *direntplus,
150                                 u64 attr_version)
151 {
152         struct fuse_entry_out *o = &direntplus->entry_out;
153         struct fuse_dirent *dirent = &direntplus->dirent;
154         struct dentry *parent = file->f_path.dentry;
155         struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156         struct dentry *dentry;
157         struct dentry *alias;
158         struct inode *dir = d_inode(parent);
159         struct fuse_conn *fc;
160         struct inode *inode;
161         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162
163         if (!o->nodeid) {
164                 /*
165                  * Unlike in the case of fuse_lookup, zero nodeid does not mean
166                  * ENOENT. Instead, it only means the userspace filesystem did
167                  * not want to return attributes/handle for this entry.
168                  *
169                  * So do nothing.
170                  */
171                 return 0;
172         }
173
174         if (name.name[0] == '.') {
175                 /*
176                  * We could potentially refresh the attributes of the directory
177                  * and its parent?
178                  */
179                 if (name.len == 1)
180                         return 0;
181                 if (name.name[1] == '.' && name.len == 2)
182                         return 0;
183         }
184
185         if (invalid_nodeid(o->nodeid))
186                 return -EIO;
187         if (!fuse_valid_type(o->attr.mode))
188                 return -EIO;
189
190         fc = get_fuse_conn(dir);
191
192         name.hash = full_name_hash(parent, name.name, name.len);
193         dentry = d_lookup(parent, &name);
194         if (!dentry) {
195 retry:
196                 dentry = d_alloc_parallel(parent, &name, &wq);
197                 if (IS_ERR(dentry))
198                         return PTR_ERR(dentry);
199         }
200         if (!d_in_lookup(dentry)) {
201                 struct fuse_inode *fi;
202                 inode = d_inode(dentry);
203                 if (!inode ||
204                     get_node_id(inode) != o->nodeid ||
205                     ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206                         d_invalidate(dentry);
207                         dput(dentry);
208                         goto retry;
209                 }
210                 if (is_bad_inode(inode)) {
211                         dput(dentry);
212                         return -EIO;
213                 }
214
215                 fi = get_fuse_inode(inode);
216                 spin_lock(&fc->lock);
217                 fi->nlookup++;
218                 spin_unlock(&fc->lock);
219
220                 forget_all_cached_acls(inode);
221                 fuse_change_attributes(inode, &o->attr,
222                                        entry_attr_timeout(o),
223                                        attr_version);
224                 /*
225                  * The other branch comes via fuse_iget()
226                  * which bumps nlookup inside
227                  */
228         } else {
229                 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230                                   &o->attr, entry_attr_timeout(o),
231                                   attr_version);
232                 if (!inode)
233                         inode = ERR_PTR(-ENOMEM);
234
235                 alias = d_splice_alias(inode, dentry);
236                 d_lookup_done(dentry);
237                 if (alias) {
238                         dput(dentry);
239                         dentry = alias;
240                 }
241                 if (IS_ERR(dentry))
242                         return PTR_ERR(dentry);
243         }
244         if (fc->readdirplus_auto)
245                 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
246         fuse_change_entry_timeout(dentry, o);
247
248         dput(dentry);
249         return 0;
250 }
251
252 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
253                              struct dir_context *ctx, u64 attr_version)
254 {
255         struct fuse_direntplus *direntplus;
256         struct fuse_dirent *dirent;
257         size_t reclen;
258         int over = 0;
259         int ret;
260
261         while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
262                 direntplus = (struct fuse_direntplus *) buf;
263                 dirent = &direntplus->dirent;
264                 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
265
266                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
267                         return -EIO;
268                 if (reclen > nbytes)
269                         break;
270                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
271                         return -EIO;
272
273                 if (!over) {
274                         /* We fill entries into dstbuf only as much as
275                            it can hold. But we still continue iterating
276                            over remaining entries to link them. If not,
277                            we need to send a FORGET for each of those
278                            which we did not link.
279                         */
280                         over = !fuse_emit(file, ctx, dirent);
281                         if (!over)
282                                 ctx->pos = dirent->off;
283                 }
284
285                 buf += reclen;
286                 nbytes -= reclen;
287
288                 ret = fuse_direntplus_link(file, direntplus, attr_version);
289                 if (ret)
290                         fuse_force_forget(file, direntplus->entry_out.nodeid);
291         }
292
293         return 0;
294 }
295
296 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
297 {
298         int plus, err;
299         size_t nbytes;
300         struct page *page;
301         struct inode *inode = file_inode(file);
302         struct fuse_conn *fc = get_fuse_conn(inode);
303         struct fuse_req *req;
304         u64 attr_version = 0;
305         bool locked;
306
307         req = fuse_get_req(fc, 1);
308         if (IS_ERR(req))
309                 return PTR_ERR(req);
310
311         page = alloc_page(GFP_KERNEL);
312         if (!page) {
313                 fuse_put_request(fc, req);
314                 return -ENOMEM;
315         }
316
317         plus = fuse_use_readdirplus(inode, ctx);
318         req->out.argpages = 1;
319         req->num_pages = 1;
320         req->pages[0] = page;
321         req->page_descs[0].length = PAGE_SIZE;
322         if (plus) {
323                 attr_version = fuse_get_attr_version(fc);
324                 fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
325                                FUSE_READDIRPLUS);
326         } else {
327                 fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
328                                FUSE_READDIR);
329         }
330         locked = fuse_lock_inode(inode);
331         fuse_request_send(fc, req);
332         fuse_unlock_inode(inode, locked);
333         nbytes = req->out.args[0].size;
334         err = req->out.h.error;
335         fuse_put_request(fc, req);
336         if (!err) {
337                 if (!nbytes) {
338                         struct fuse_file *ff = file->private_data;
339
340                         if (ff->open_flags & FOPEN_CACHE_DIR)
341                                 fuse_readdir_cache_end(file, ctx->pos);
342                 } else if (plus) {
343                         err = parse_dirplusfile(page_address(page), nbytes,
344                                                 file, ctx, attr_version);
345                 } else {
346                         err = parse_dirfile(page_address(page), nbytes, file,
347                                             ctx);
348                 }
349         }
350
351         __free_page(page);
352         fuse_invalidate_atime(inode);
353         return err;
354 }
355
356 enum fuse_parse_result {
357         FOUND_ERR = -1,
358         FOUND_NONE = 0,
359         FOUND_SOME,
360         FOUND_ALL,
361 };
362
363 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
364                                                void *addr, unsigned int size,
365                                                struct dir_context *ctx)
366 {
367         unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
368         enum fuse_parse_result res = FOUND_NONE;
369
370         WARN_ON(offset >= size);
371
372         for (;;) {
373                 struct fuse_dirent *dirent = addr + offset;
374                 unsigned int nbytes = size - offset;
375                 size_t reclen = FUSE_DIRENT_SIZE(dirent);
376
377                 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
378                         break;
379
380                 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
381                         return FOUND_ERR;
382                 if (WARN_ON(reclen > nbytes))
383                         return FOUND_ERR;
384                 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
385                         return FOUND_ERR;
386
387                 if (ff->readdir.pos == ctx->pos) {
388                         res = FOUND_SOME;
389                         if (!dir_emit(ctx, dirent->name, dirent->namelen,
390                                       dirent->ino, dirent->type))
391                                 return FOUND_ALL;
392                         ctx->pos = dirent->off;
393                 }
394                 ff->readdir.pos = dirent->off;
395                 ff->readdir.cache_off += reclen;
396
397                 offset += reclen;
398         }
399
400         return res;
401 }
402
403 static void fuse_rdc_reset(struct inode *inode)
404 {
405         struct fuse_inode *fi = get_fuse_inode(inode);
406
407         fi->rdc.cached = false;
408         fi->rdc.version++;
409         fi->rdc.size = 0;
410         fi->rdc.pos = 0;
411 }
412
413 #define UNCACHED 1
414
415 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
416 {
417         struct fuse_file *ff = file->private_data;
418         struct inode *inode = file_inode(file);
419         struct fuse_conn *fc = get_fuse_conn(inode);
420         struct fuse_inode *fi = get_fuse_inode(inode);
421         enum fuse_parse_result res;
422         pgoff_t index;
423         unsigned int size;
424         struct page *page;
425         void *addr;
426
427         /* Seeked?  If so, reset the cache stream */
428         if (ff->readdir.pos != ctx->pos) {
429                 ff->readdir.pos = 0;
430                 ff->readdir.cache_off = 0;
431         }
432
433         /*
434          * We're just about to start reading into the cache or reading the
435          * cache; both cases require an up-to-date mtime value.
436          */
437         if (!ctx->pos && fc->auto_inval_data) {
438                 int err = fuse_update_attributes(inode, file);
439
440                 if (err)
441                         return err;
442         }
443
444 retry:
445         spin_lock(&fi->rdc.lock);
446 retry_locked:
447         if (!fi->rdc.cached) {
448                 /* Starting cache? Set cache mtime. */
449                 if (!ctx->pos && !fi->rdc.size) {
450                         fi->rdc.mtime = inode->i_mtime;
451                         fi->rdc.iversion = inode_query_iversion(inode);
452                 }
453                 spin_unlock(&fi->rdc.lock);
454                 return UNCACHED;
455         }
456         /*
457          * When at the beginning of the directory (i.e. just after opendir(3) or
458          * rewinddir(3)), then need to check whether directory contents have
459          * changed, and reset the cache if so.
460          */
461         if (!ctx->pos) {
462                 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
463                     !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
464                         fuse_rdc_reset(inode);
465                         goto retry_locked;
466                 }
467         }
468
469         /*
470          * If cache version changed since the last getdents() call, then reset
471          * the cache stream.
472          */
473         if (ff->readdir.version != fi->rdc.version) {
474                 ff->readdir.pos = 0;
475                 ff->readdir.cache_off = 0;
476         }
477         /*
478          * If at the beginning of the cache, than reset version to
479          * current.
480          */
481         if (ff->readdir.pos == 0)
482                 ff->readdir.version = fi->rdc.version;
483
484         WARN_ON(fi->rdc.size < ff->readdir.cache_off);
485
486         index = ff->readdir.cache_off >> PAGE_SHIFT;
487
488         if (index == (fi->rdc.size >> PAGE_SHIFT))
489                 size = fi->rdc.size & ~PAGE_MASK;
490         else
491                 size = PAGE_SIZE;
492         spin_unlock(&fi->rdc.lock);
493
494         /* EOF? */
495         if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
496                 return 0;
497
498         page = find_get_page_flags(file->f_mapping, index,
499                                    FGP_ACCESSED | FGP_LOCK);
500         spin_lock(&fi->rdc.lock);
501         if (!page) {
502                 /*
503                  * Uh-oh: page gone missing, cache is useless
504                  */
505                 if (fi->rdc.version == ff->readdir.version)
506                         fuse_rdc_reset(inode);
507                 goto retry_locked;
508         }
509
510         /* Make sure it's still the same version after getting the page. */
511         if (ff->readdir.version != fi->rdc.version) {
512                 spin_unlock(&fi->rdc.lock);
513                 unlock_page(page);
514                 put_page(page);
515                 goto retry;
516         }
517         spin_unlock(&fi->rdc.lock);
518
519         /*
520          * Contents of the page are now protected against changing by holding
521          * the page lock.
522          */
523         addr = kmap(page);
524         res = fuse_parse_cache(ff, addr, size, ctx);
525         kunmap(page);
526         unlock_page(page);
527         put_page(page);
528
529         if (res == FOUND_ERR)
530                 return -EIO;
531
532         if (res == FOUND_ALL)
533                 return 0;
534
535         if (size == PAGE_SIZE) {
536                 /* We hit end of page: skip to next page. */
537                 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
538                 goto retry;
539         }
540
541         /*
542          * End of cache reached.  If found position, then we are done, otherwise
543          * need to fall back to uncached, since the position we were looking for
544          * wasn't in the cache.
545          */
546         return res == FOUND_SOME ? 0 : UNCACHED;
547 }
548
549 int fuse_readdir(struct file *file, struct dir_context *ctx)
550 {
551         struct fuse_file *ff = file->private_data;
552         struct inode *inode = file_inode(file);
553         int err;
554
555         if (is_bad_inode(inode))
556                 return -EIO;
557
558         mutex_lock(&ff->readdir.lock);
559
560         err = UNCACHED;
561         if (ff->open_flags & FOPEN_CACHE_DIR)
562                 err = fuse_readdir_cached(file, ctx);
563         if (err == UNCACHED)
564                 err = fuse_readdir_uncached(file, ctx);
565
566         mutex_unlock(&ff->readdir.lock);
567
568         return err;
569 }