f0fd3adb169398ec8483876dece90fe2bc435d78
[sfrench/cifs-2.6.git] / fs / overlayfs / readdir.c
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9
10 #include <linux/fs.h>
11 #include <linux/slab.h>
12 #include <linux/namei.h>
13 #include <linux/file.h>
14 #include <linux/xattr.h>
15 #include <linux/rbtree.h>
16 #include <linux/security.h>
17 #include <linux/cred.h>
18 #include "overlayfs.h"
19
20 struct ovl_cache_entry {
21         unsigned int len;
22         unsigned int type;
23         u64 ino;
24         struct list_head l_node;
25         struct rb_node node;
26         struct ovl_cache_entry *next_maybe_whiteout;
27         bool is_whiteout;
28         char name[];
29 };
30
31 struct ovl_dir_cache {
32         long refcount;
33         u64 version;
34         struct list_head entries;
35 };
36
37 struct ovl_readdir_data {
38         struct dir_context ctx;
39         struct dentry *dentry;
40         bool is_lowest;
41         struct rb_root root;
42         struct list_head *list;
43         struct list_head middle;
44         struct ovl_cache_entry *first_maybe_whiteout;
45         int count;
46         int err;
47         bool d_type_supported;
48 };
49
50 struct ovl_dir_file {
51         bool is_real;
52         bool is_upper;
53         struct ovl_dir_cache *cache;
54         struct list_head *cursor;
55         struct file *realfile;
56         struct file *upperfile;
57 };
58
59 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
60 {
61         return container_of(n, struct ovl_cache_entry, node);
62 }
63
64 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
65                                                     const char *name, int len)
66 {
67         struct rb_node *node = root->rb_node;
68         int cmp;
69
70         while (node) {
71                 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
72
73                 cmp = strncmp(name, p->name, len);
74                 if (cmp > 0)
75                         node = p->node.rb_right;
76                 else if (cmp < 0 || len < p->len)
77                         node = p->node.rb_left;
78                 else
79                         return p;
80         }
81
82         return NULL;
83 }
84
85 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
86                                                    const char *name, int len,
87                                                    u64 ino, unsigned int d_type)
88 {
89         struct ovl_cache_entry *p;
90         size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
91
92         p = kmalloc(size, GFP_KERNEL);
93         if (!p)
94                 return NULL;
95
96         memcpy(p->name, name, len);
97         p->name[len] = '\0';
98         p->len = len;
99         p->type = d_type;
100         p->ino = ino;
101         p->is_whiteout = false;
102
103         if (d_type == DT_CHR) {
104                 p->next_maybe_whiteout = rdd->first_maybe_whiteout;
105                 rdd->first_maybe_whiteout = p;
106         }
107         return p;
108 }
109
110 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
111                                   const char *name, int len, u64 ino,
112                                   unsigned int d_type)
113 {
114         struct rb_node **newp = &rdd->root.rb_node;
115         struct rb_node *parent = NULL;
116         struct ovl_cache_entry *p;
117
118         while (*newp) {
119                 int cmp;
120                 struct ovl_cache_entry *tmp;
121
122                 parent = *newp;
123                 tmp = ovl_cache_entry_from_node(*newp);
124                 cmp = strncmp(name, tmp->name, len);
125                 if (cmp > 0)
126                         newp = &tmp->node.rb_right;
127                 else if (cmp < 0 || len < tmp->len)
128                         newp = &tmp->node.rb_left;
129                 else
130                         return 0;
131         }
132
133         p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
134         if (p == NULL)
135                 return -ENOMEM;
136
137         list_add_tail(&p->l_node, rdd->list);
138         rb_link_node(&p->node, parent, newp);
139         rb_insert_color(&p->node, &rdd->root);
140
141         return 0;
142 }
143
144 static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
145                            const char *name, int namelen,
146                            loff_t offset, u64 ino, unsigned int d_type)
147 {
148         struct ovl_cache_entry *p;
149
150         p = ovl_cache_entry_find(&rdd->root, name, namelen);
151         if (p) {
152                 list_move_tail(&p->l_node, &rdd->middle);
153         } else {
154                 p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
155                 if (p == NULL)
156                         rdd->err = -ENOMEM;
157                 else
158                         list_add_tail(&p->l_node, &rdd->middle);
159         }
160
161         return rdd->err;
162 }
163
164 void ovl_cache_free(struct list_head *list)
165 {
166         struct ovl_cache_entry *p;
167         struct ovl_cache_entry *n;
168
169         list_for_each_entry_safe(p, n, list, l_node)
170                 kfree(p);
171
172         INIT_LIST_HEAD(list);
173 }
174
175 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
176 {
177         struct ovl_dir_cache *cache = od->cache;
178
179         WARN_ON(cache->refcount <= 0);
180         cache->refcount--;
181         if (!cache->refcount) {
182                 if (ovl_dir_cache(dentry) == cache)
183                         ovl_set_dir_cache(dentry, NULL);
184
185                 ovl_cache_free(&cache->entries);
186                 kfree(cache);
187         }
188 }
189
190 static int ovl_fill_merge(struct dir_context *ctx, const char *name,
191                           int namelen, loff_t offset, u64 ino,
192                           unsigned int d_type)
193 {
194         struct ovl_readdir_data *rdd =
195                 container_of(ctx, struct ovl_readdir_data, ctx);
196
197         rdd->count++;
198         if (!rdd->is_lowest)
199                 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
200         else
201                 return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
202 }
203
204 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
205 {
206         int err;
207         struct ovl_cache_entry *p;
208         struct dentry *dentry;
209         const struct cred *old_cred;
210
211         old_cred = ovl_override_creds(rdd->dentry->d_sb);
212
213         err = down_write_killable(&dir->d_inode->i_rwsem);
214         if (!err) {
215                 while (rdd->first_maybe_whiteout) {
216                         p = rdd->first_maybe_whiteout;
217                         rdd->first_maybe_whiteout = p->next_maybe_whiteout;
218                         dentry = lookup_one_len(p->name, dir, p->len);
219                         if (!IS_ERR(dentry)) {
220                                 p->is_whiteout = ovl_is_whiteout(dentry);
221                                 dput(dentry);
222                         }
223                 }
224                 inode_unlock(dir->d_inode);
225         }
226         revert_creds(old_cred);
227
228         return err;
229 }
230
231 static inline int ovl_dir_read(struct path *realpath,
232                                struct ovl_readdir_data *rdd)
233 {
234         struct file *realfile;
235         int err;
236
237         realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
238         if (IS_ERR(realfile))
239                 return PTR_ERR(realfile);
240
241         rdd->first_maybe_whiteout = NULL;
242         rdd->ctx.pos = 0;
243         do {
244                 rdd->count = 0;
245                 rdd->err = 0;
246                 err = iterate_dir(realfile, &rdd->ctx);
247                 if (err >= 0)
248                         err = rdd->err;
249         } while (!err && rdd->count);
250
251         if (!err && rdd->first_maybe_whiteout && rdd->dentry)
252                 err = ovl_check_whiteouts(realpath->dentry, rdd);
253
254         fput(realfile);
255
256         return err;
257 }
258
259 static void ovl_dir_reset(struct file *file)
260 {
261         struct ovl_dir_file *od = file->private_data;
262         struct ovl_dir_cache *cache = od->cache;
263         struct dentry *dentry = file->f_path.dentry;
264         enum ovl_path_type type = ovl_path_type(dentry);
265
266         if (cache && ovl_dentry_version_get(dentry) != cache->version) {
267                 ovl_cache_put(od, dentry);
268                 od->cache = NULL;
269                 od->cursor = NULL;
270         }
271         WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
272         if (od->is_real && OVL_TYPE_MERGE(type))
273                 od->is_real = false;
274 }
275
276 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
277 {
278         int err;
279         struct path realpath;
280         struct ovl_readdir_data rdd = {
281                 .ctx.actor = ovl_fill_merge,
282                 .dentry = dentry,
283                 .list = list,
284                 .root = RB_ROOT,
285                 .is_lowest = false,
286         };
287         int idx, next;
288
289         for (idx = 0; idx != -1; idx = next) {
290                 next = ovl_path_next(idx, dentry, &realpath);
291
292                 if (next != -1) {
293                         err = ovl_dir_read(&realpath, &rdd);
294                         if (err)
295                                 break;
296                 } else {
297                         /*
298                          * Insert lowest layer entries before upper ones, this
299                          * allows offsets to be reasonably constant
300                          */
301                         list_add(&rdd.middle, rdd.list);
302                         rdd.is_lowest = true;
303                         err = ovl_dir_read(&realpath, &rdd);
304                         list_del(&rdd.middle);
305                 }
306         }
307         return err;
308 }
309
310 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
311 {
312         struct list_head *p;
313         loff_t off = 0;
314
315         list_for_each(p, &od->cache->entries) {
316                 if (off >= pos)
317                         break;
318                 off++;
319         }
320         /* Cursor is safe since the cache is stable */
321         od->cursor = p;
322 }
323
324 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
325 {
326         int res;
327         struct ovl_dir_cache *cache;
328
329         cache = ovl_dir_cache(dentry);
330         if (cache && ovl_dentry_version_get(dentry) == cache->version) {
331                 cache->refcount++;
332                 return cache;
333         }
334         ovl_set_dir_cache(dentry, NULL);
335
336         cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
337         if (!cache)
338                 return ERR_PTR(-ENOMEM);
339
340         cache->refcount = 1;
341         INIT_LIST_HEAD(&cache->entries);
342
343         res = ovl_dir_read_merged(dentry, &cache->entries);
344         if (res) {
345                 ovl_cache_free(&cache->entries);
346                 kfree(cache);
347                 return ERR_PTR(res);
348         }
349
350         cache->version = ovl_dentry_version_get(dentry);
351         ovl_set_dir_cache(dentry, cache);
352
353         return cache;
354 }
355
356 static int ovl_iterate(struct file *file, struct dir_context *ctx)
357 {
358         struct ovl_dir_file *od = file->private_data;
359         struct dentry *dentry = file->f_path.dentry;
360         struct ovl_cache_entry *p;
361
362         if (!ctx->pos)
363                 ovl_dir_reset(file);
364
365         if (od->is_real)
366                 return iterate_dir(od->realfile, ctx);
367
368         if (!od->cache) {
369                 struct ovl_dir_cache *cache;
370
371                 cache = ovl_cache_get(dentry);
372                 if (IS_ERR(cache))
373                         return PTR_ERR(cache);
374
375                 od->cache = cache;
376                 ovl_seek_cursor(od, ctx->pos);
377         }
378
379         while (od->cursor != &od->cache->entries) {
380                 p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
381                 if (!p->is_whiteout)
382                         if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
383                                 break;
384                 od->cursor = p->l_node.next;
385                 ctx->pos++;
386         }
387         return 0;
388 }
389
390 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
391 {
392         loff_t res;
393         struct ovl_dir_file *od = file->private_data;
394
395         inode_lock(file_inode(file));
396         if (!file->f_pos)
397                 ovl_dir_reset(file);
398
399         if (od->is_real) {
400                 res = vfs_llseek(od->realfile, offset, origin);
401                 file->f_pos = od->realfile->f_pos;
402         } else {
403                 res = -EINVAL;
404
405                 switch (origin) {
406                 case SEEK_CUR:
407                         offset += file->f_pos;
408                         break;
409                 case SEEK_SET:
410                         break;
411                 default:
412                         goto out_unlock;
413                 }
414                 if (offset < 0)
415                         goto out_unlock;
416
417                 if (offset != file->f_pos) {
418                         file->f_pos = offset;
419                         if (od->cache)
420                                 ovl_seek_cursor(od, offset);
421                 }
422                 res = offset;
423         }
424 out_unlock:
425         inode_unlock(file_inode(file));
426
427         return res;
428 }
429
430 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
431                          int datasync)
432 {
433         struct ovl_dir_file *od = file->private_data;
434         struct dentry *dentry = file->f_path.dentry;
435         struct file *realfile = od->realfile;
436
437         /*
438          * Need to check if we started out being a lower dir, but got copied up
439          */
440         if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
441                 struct inode *inode = file_inode(file);
442
443                 realfile = lockless_dereference(od->upperfile);
444                 if (!realfile) {
445                         struct path upperpath;
446
447                         ovl_path_upper(dentry, &upperpath);
448                         realfile = ovl_path_open(&upperpath, O_RDONLY);
449
450                         inode_lock(inode);
451                         if (!od->upperfile) {
452                                 if (IS_ERR(realfile)) {
453                                         inode_unlock(inode);
454                                         return PTR_ERR(realfile);
455                                 }
456                                 smp_store_release(&od->upperfile, realfile);
457                         } else {
458                                 /* somebody has beaten us to it */
459                                 if (!IS_ERR(realfile))
460                                         fput(realfile);
461                                 realfile = od->upperfile;
462                         }
463                         inode_unlock(inode);
464                 }
465         }
466
467         return vfs_fsync_range(realfile, start, end, datasync);
468 }
469
470 static int ovl_dir_release(struct inode *inode, struct file *file)
471 {
472         struct ovl_dir_file *od = file->private_data;
473
474         if (od->cache) {
475                 inode_lock(inode);
476                 ovl_cache_put(od, file->f_path.dentry);
477                 inode_unlock(inode);
478         }
479         fput(od->realfile);
480         if (od->upperfile)
481                 fput(od->upperfile);
482         kfree(od);
483
484         return 0;
485 }
486
487 static int ovl_dir_open(struct inode *inode, struct file *file)
488 {
489         struct path realpath;
490         struct file *realfile;
491         struct ovl_dir_file *od;
492         enum ovl_path_type type;
493
494         od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
495         if (!od)
496                 return -ENOMEM;
497
498         type = ovl_path_real(file->f_path.dentry, &realpath);
499         realfile = ovl_path_open(&realpath, file->f_flags);
500         if (IS_ERR(realfile)) {
501                 kfree(od);
502                 return PTR_ERR(realfile);
503         }
504         od->realfile = realfile;
505         od->is_real = !OVL_TYPE_MERGE(type);
506         od->is_upper = OVL_TYPE_UPPER(type);
507         file->private_data = od;
508
509         return 0;
510 }
511
512 const struct file_operations ovl_dir_operations = {
513         .read           = generic_read_dir,
514         .open           = ovl_dir_open,
515         .iterate        = ovl_iterate,
516         .llseek         = ovl_dir_llseek,
517         .fsync          = ovl_dir_fsync,
518         .release        = ovl_dir_release,
519 };
520
521 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
522 {
523         int err;
524         struct ovl_cache_entry *p;
525
526         err = ovl_dir_read_merged(dentry, list);
527         if (err)
528                 return err;
529
530         err = 0;
531
532         list_for_each_entry(p, list, l_node) {
533                 if (p->is_whiteout)
534                         continue;
535
536                 if (p->name[0] == '.') {
537                         if (p->len == 1)
538                                 continue;
539                         if (p->len == 2 && p->name[1] == '.')
540                                 continue;
541                 }
542                 err = -ENOTEMPTY;
543                 break;
544         }
545
546         return err;
547 }
548
549 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
550 {
551         struct ovl_cache_entry *p;
552
553         inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
554         list_for_each_entry(p, list, l_node) {
555                 struct dentry *dentry;
556
557                 if (!p->is_whiteout)
558                         continue;
559
560                 dentry = lookup_one_len(p->name, upper, p->len);
561                 if (IS_ERR(dentry)) {
562                         pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
563                                upper->d_name.name, p->len, p->name,
564                                (int) PTR_ERR(dentry));
565                         continue;
566                 }
567                 if (dentry->d_inode)
568                         ovl_cleanup(upper->d_inode, dentry);
569                 dput(dentry);
570         }
571         inode_unlock(upper->d_inode);
572 }
573
574 static int ovl_check_d_type(struct dir_context *ctx, const char *name,
575                           int namelen, loff_t offset, u64 ino,
576                           unsigned int d_type)
577 {
578         struct ovl_readdir_data *rdd =
579                 container_of(ctx, struct ovl_readdir_data, ctx);
580
581         /* Even if d_type is not supported, DT_DIR is returned for . and .. */
582         if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
583                 return 0;
584
585         if (d_type != DT_UNKNOWN)
586                 rdd->d_type_supported = true;
587
588         return 0;
589 }
590
591 /*
592  * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
593  * if error is encountered.
594  */
595 int ovl_check_d_type_supported(struct path *realpath)
596 {
597         int err;
598         struct ovl_readdir_data rdd = {
599                 .ctx.actor = ovl_check_d_type,
600                 .d_type_supported = false,
601         };
602
603         err = ovl_dir_read(realpath, &rdd);
604         if (err)
605                 return err;
606
607         return rdd.d_type_supported;
608 }
609
610 static void ovl_workdir_cleanup_recurse(struct path *path, int level)
611 {
612         int err;
613         struct inode *dir = path->dentry->d_inode;
614         LIST_HEAD(list);
615         struct ovl_cache_entry *p;
616         struct ovl_readdir_data rdd = {
617                 .ctx.actor = ovl_fill_merge,
618                 .dentry = NULL,
619                 .list = &list,
620                 .root = RB_ROOT,
621                 .is_lowest = false,
622         };
623
624         err = ovl_dir_read(path, &rdd);
625         if (err)
626                 goto out;
627
628         inode_lock_nested(dir, I_MUTEX_PARENT);
629         list_for_each_entry(p, &list, l_node) {
630                 struct dentry *dentry;
631
632                 if (p->name[0] == '.') {
633                         if (p->len == 1)
634                                 continue;
635                         if (p->len == 2 && p->name[1] == '.')
636                                 continue;
637                 }
638                 dentry = lookup_one_len(p->name, path->dentry, p->len);
639                 if (IS_ERR(dentry))
640                         continue;
641                 if (dentry->d_inode)
642                         ovl_workdir_cleanup(dir, path->mnt, dentry, level);
643                 dput(dentry);
644         }
645         inode_unlock(dir);
646 out:
647         ovl_cache_free(&list);
648 }
649
650 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
651                          struct dentry *dentry, int level)
652 {
653         int err;
654
655         if (!d_is_dir(dentry) || level > 1) {
656                 ovl_cleanup(dir, dentry);
657                 return;
658         }
659
660         err = ovl_do_rmdir(dir, dentry);
661         if (err) {
662                 struct path path = { .mnt = mnt, .dentry = dentry };
663
664                 inode_unlock(dir);
665                 ovl_workdir_cleanup_recurse(&path, level + 1);
666                 inode_lock_nested(dir, I_MUTEX_PARENT);
667                 ovl_cleanup(dir, dentry);
668         }
669 }
670
671 int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
672                          struct path *lowerstack, unsigned int numlower)
673 {
674         int err;
675         struct inode *dir = dentry->d_inode;
676         struct path path = { .mnt = mnt, .dentry = dentry };
677         LIST_HEAD(list);
678         struct ovl_cache_entry *p;
679         struct ovl_readdir_data rdd = {
680                 .ctx.actor = ovl_fill_merge,
681                 .dentry = NULL,
682                 .list = &list,
683                 .root = RB_ROOT,
684                 .is_lowest = false,
685         };
686
687         err = ovl_dir_read(&path, &rdd);
688         if (err)
689                 goto out;
690
691         inode_lock_nested(dir, I_MUTEX_PARENT);
692         list_for_each_entry(p, &list, l_node) {
693                 struct dentry *index;
694
695                 if (p->name[0] == '.') {
696                         if (p->len == 1)
697                                 continue;
698                         if (p->len == 2 && p->name[1] == '.')
699                                 continue;
700                 }
701                 index = lookup_one_len(p->name, dentry, p->len);
702                 if (IS_ERR(index)) {
703                         err = PTR_ERR(index);
704                         break;
705                 }
706                 err = ovl_verify_index(index, lowerstack, numlower);
707                 if (err) {
708                         if (err == -EROFS)
709                                 break;
710                         err = ovl_cleanup(dir, index);
711                         if (err)
712                                 break;
713                 }
714                 dput(index);
715         }
716         inode_unlock(dir);
717 out:
718         ovl_cache_free(&list);
719         if (err)
720                 pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
721         return err;
722 }