Merge branch 'psmouse' into next
[sfrench/cifs-2.6.git] / fs / ocfs2 / extent_map.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * extent_map.c
5  *
6  * Block/Cluster mapping functions
7  *
8  * Copyright (C) 2004 Oracle.  All rights reserved.
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public
12  * License, version 2,  as published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public
20  * License along with this program; if not, write to the
21  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22  * Boston, MA 021110-1307, USA.
23  */
24
25 #include <linux/fs.h>
26 #include <linux/init.h>
27 #include <linux/slab.h>
28 #include <linux/types.h>
29 #include <linux/fiemap.h>
30
31 #include <cluster/masklog.h>
32
33 #include "ocfs2.h"
34
35 #include "alloc.h"
36 #include "dlmglue.h"
37 #include "extent_map.h"
38 #include "inode.h"
39 #include "super.h"
40 #include "symlink.h"
41 #include "aops.h"
42 #include "ocfs2_trace.h"
43
44 #include "buffer_head_io.h"
45
46 /*
47  * The extent caching implementation is intentionally trivial.
48  *
49  * We only cache a small number of extents stored directly on the
50  * inode, so linear order operations are acceptable. If we ever want
51  * to increase the size of the extent map, then these algorithms must
52  * get smarter.
53  */
54
55 void ocfs2_extent_map_init(struct inode *inode)
56 {
57         struct ocfs2_inode_info *oi = OCFS2_I(inode);
58
59         oi->ip_extent_map.em_num_items = 0;
60         INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
61 }
62
63 static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
64                                       unsigned int cpos,
65                                       struct ocfs2_extent_map_item **ret_emi)
66 {
67         unsigned int range;
68         struct ocfs2_extent_map_item *emi;
69
70         *ret_emi = NULL;
71
72         list_for_each_entry(emi, &em->em_list, ei_list) {
73                 range = emi->ei_cpos + emi->ei_clusters;
74
75                 if (cpos >= emi->ei_cpos && cpos < range) {
76                         list_move(&emi->ei_list, &em->em_list);
77
78                         *ret_emi = emi;
79                         break;
80                 }
81         }
82 }
83
84 static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
85                                    unsigned int *phys, unsigned int *len,
86                                    unsigned int *flags)
87 {
88         unsigned int coff;
89         struct ocfs2_inode_info *oi = OCFS2_I(inode);
90         struct ocfs2_extent_map_item *emi;
91
92         spin_lock(&oi->ip_lock);
93
94         __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
95         if (emi) {
96                 coff = cpos - emi->ei_cpos;
97                 *phys = emi->ei_phys + coff;
98                 if (len)
99                         *len = emi->ei_clusters - coff;
100                 if (flags)
101                         *flags = emi->ei_flags;
102         }
103
104         spin_unlock(&oi->ip_lock);
105
106         if (emi == NULL)
107                 return -ENOENT;
108
109         return 0;
110 }
111
112 /*
113  * Forget about all clusters equal to or greater than cpos.
114  */
115 void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
116 {
117         struct ocfs2_extent_map_item *emi, *n;
118         struct ocfs2_inode_info *oi = OCFS2_I(inode);
119         struct ocfs2_extent_map *em = &oi->ip_extent_map;
120         LIST_HEAD(tmp_list);
121         unsigned int range;
122
123         spin_lock(&oi->ip_lock);
124         list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
125                 if (emi->ei_cpos >= cpos) {
126                         /* Full truncate of this record. */
127                         list_move(&emi->ei_list, &tmp_list);
128                         BUG_ON(em->em_num_items == 0);
129                         em->em_num_items--;
130                         continue;
131                 }
132
133                 range = emi->ei_cpos + emi->ei_clusters;
134                 if (range > cpos) {
135                         /* Partial truncate */
136                         emi->ei_clusters = cpos - emi->ei_cpos;
137                 }
138         }
139         spin_unlock(&oi->ip_lock);
140
141         list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
142                 list_del(&emi->ei_list);
143                 kfree(emi);
144         }
145 }
146
147 /*
148  * Is any part of emi2 contained within emi1
149  */
150 static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
151                                  struct ocfs2_extent_map_item *emi2)
152 {
153         unsigned int range1, range2;
154
155         /*
156          * Check if logical start of emi2 is inside emi1
157          */
158         range1 = emi1->ei_cpos + emi1->ei_clusters;
159         if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
160                 return 1;
161
162         /*
163          * Check if logical end of emi2 is inside emi1
164          */
165         range2 = emi2->ei_cpos + emi2->ei_clusters;
166         if (range2 > emi1->ei_cpos && range2 <= range1)
167                 return 1;
168
169         return 0;
170 }
171
172 static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
173                                   struct ocfs2_extent_map_item *src)
174 {
175         dest->ei_cpos = src->ei_cpos;
176         dest->ei_phys = src->ei_phys;
177         dest->ei_clusters = src->ei_clusters;
178         dest->ei_flags = src->ei_flags;
179 }
180
181 /*
182  * Try to merge emi with ins. Returns 1 if merge succeeds, zero
183  * otherwise.
184  */
185 static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
186                                          struct ocfs2_extent_map_item *ins)
187 {
188         /*
189          * Handle contiguousness
190          */
191         if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
192             ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
193             ins->ei_flags == emi->ei_flags) {
194                 emi->ei_clusters += ins->ei_clusters;
195                 return 1;
196         } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
197                    (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
198                    ins->ei_flags == emi->ei_flags) {
199                 emi->ei_phys = ins->ei_phys;
200                 emi->ei_cpos = ins->ei_cpos;
201                 emi->ei_clusters += ins->ei_clusters;
202                 return 1;
203         }
204
205         /*
206          * Overlapping extents - this shouldn't happen unless we've
207          * split an extent to change it's flags. That is exceedingly
208          * rare, so there's no sense in trying to optimize it yet.
209          */
210         if (ocfs2_ei_is_contained(emi, ins) ||
211             ocfs2_ei_is_contained(ins, emi)) {
212                 ocfs2_copy_emi_fields(emi, ins);
213                 return 1;
214         }
215
216         /* No merge was possible. */
217         return 0;
218 }
219
220 /*
221  * In order to reduce complexity on the caller, this insert function
222  * is intentionally liberal in what it will accept.
223  *
224  * The only rule is that the truncate call *must* be used whenever
225  * records have been deleted. This avoids inserting overlapping
226  * records with different physical mappings.
227  */
228 void ocfs2_extent_map_insert_rec(struct inode *inode,
229                                  struct ocfs2_extent_rec *rec)
230 {
231         struct ocfs2_inode_info *oi = OCFS2_I(inode);
232         struct ocfs2_extent_map *em = &oi->ip_extent_map;
233         struct ocfs2_extent_map_item *emi, *new_emi = NULL;
234         struct ocfs2_extent_map_item ins;
235
236         ins.ei_cpos = le32_to_cpu(rec->e_cpos);
237         ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
238                                                le64_to_cpu(rec->e_blkno));
239         ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
240         ins.ei_flags = rec->e_flags;
241
242 search:
243         spin_lock(&oi->ip_lock);
244
245         list_for_each_entry(emi, &em->em_list, ei_list) {
246                 if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
247                         list_move(&emi->ei_list, &em->em_list);
248                         spin_unlock(&oi->ip_lock);
249                         goto out;
250                 }
251         }
252
253         /*
254          * No item could be merged.
255          *
256          * Either allocate and add a new item, or overwrite the last recently
257          * inserted.
258          */
259
260         if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
261                 if (new_emi == NULL) {
262                         spin_unlock(&oi->ip_lock);
263
264                         new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
265                         if (new_emi == NULL)
266                                 goto out;
267
268                         goto search;
269                 }
270
271                 ocfs2_copy_emi_fields(new_emi, &ins);
272                 list_add(&new_emi->ei_list, &em->em_list);
273                 em->em_num_items++;
274                 new_emi = NULL;
275         } else {
276                 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
277                 emi = list_entry(em->em_list.prev,
278                                  struct ocfs2_extent_map_item, ei_list);
279                 list_move(&emi->ei_list, &em->em_list);
280                 ocfs2_copy_emi_fields(emi, &ins);
281         }
282
283         spin_unlock(&oi->ip_lock);
284
285 out:
286         kfree(new_emi);
287 }
288
289 static int ocfs2_last_eb_is_empty(struct inode *inode,
290                                   struct ocfs2_dinode *di)
291 {
292         int ret, next_free;
293         u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
294         struct buffer_head *eb_bh = NULL;
295         struct ocfs2_extent_block *eb;
296         struct ocfs2_extent_list *el;
297
298         ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
299         if (ret) {
300                 mlog_errno(ret);
301                 goto out;
302         }
303
304         eb = (struct ocfs2_extent_block *) eb_bh->b_data;
305         el = &eb->h_list;
306
307         if (el->l_tree_depth) {
308                 ocfs2_error(inode->i_sb,
309                             "Inode %lu has non zero tree depth in leaf block %llu\n",
310                             inode->i_ino,
311                             (unsigned long long)eb_bh->b_blocknr);
312                 ret = -EROFS;
313                 goto out;
314         }
315
316         next_free = le16_to_cpu(el->l_next_free_rec);
317
318         if (next_free == 0 ||
319             (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
320                 ret = 1;
321
322 out:
323         brelse(eb_bh);
324         return ret;
325 }
326
327 /*
328  * Return the 1st index within el which contains an extent start
329  * larger than v_cluster.
330  */
331 static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
332                                        u32 v_cluster)
333 {
334         int i;
335         struct ocfs2_extent_rec *rec;
336
337         for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
338                 rec = &el->l_recs[i];
339
340                 if (v_cluster < le32_to_cpu(rec->e_cpos))
341                         break;
342         }
343
344         return i;
345 }
346
347 /*
348  * Figure out the size of a hole which starts at v_cluster within the given
349  * extent list.
350  *
351  * If there is no more allocation past v_cluster, we return the maximum
352  * cluster size minus v_cluster.
353  *
354  * If we have in-inode extents, then el points to the dinode list and
355  * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
356  * containing el.
357  */
358 int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
359                                struct ocfs2_extent_list *el,
360                                struct buffer_head *eb_bh,
361                                u32 v_cluster,
362                                u32 *num_clusters)
363 {
364         int ret, i;
365         struct buffer_head *next_eb_bh = NULL;
366         struct ocfs2_extent_block *eb, *next_eb;
367
368         i = ocfs2_search_for_hole_index(el, v_cluster);
369
370         if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
371                 eb = (struct ocfs2_extent_block *)eb_bh->b_data;
372
373                 /*
374                  * Check the next leaf for any extents.
375                  */
376
377                 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
378                         goto no_more_extents;
379
380                 ret = ocfs2_read_extent_block(ci,
381                                               le64_to_cpu(eb->h_next_leaf_blk),
382                                               &next_eb_bh);
383                 if (ret) {
384                         mlog_errno(ret);
385                         goto out;
386                 }
387
388                 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
389                 el = &next_eb->h_list;
390                 i = ocfs2_search_for_hole_index(el, v_cluster);
391         }
392
393 no_more_extents:
394         if (i == le16_to_cpu(el->l_next_free_rec)) {
395                 /*
396                  * We're at the end of our existing allocation. Just
397                  * return the maximum number of clusters we could
398                  * possibly allocate.
399                  */
400                 *num_clusters = UINT_MAX - v_cluster;
401         } else {
402                 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
403         }
404
405         ret = 0;
406 out:
407         brelse(next_eb_bh);
408         return ret;
409 }
410
411 static int ocfs2_get_clusters_nocache(struct inode *inode,
412                                       struct buffer_head *di_bh,
413                                       u32 v_cluster, unsigned int *hole_len,
414                                       struct ocfs2_extent_rec *ret_rec,
415                                       unsigned int *is_last)
416 {
417         int i, ret, tree_height, len;
418         struct ocfs2_dinode *di;
419         struct ocfs2_extent_block *uninitialized_var(eb);
420         struct ocfs2_extent_list *el;
421         struct ocfs2_extent_rec *rec;
422         struct buffer_head *eb_bh = NULL;
423
424         memset(ret_rec, 0, sizeof(*ret_rec));
425         if (is_last)
426                 *is_last = 0;
427
428         di = (struct ocfs2_dinode *) di_bh->b_data;
429         el = &di->id2.i_list;
430         tree_height = le16_to_cpu(el->l_tree_depth);
431
432         if (tree_height > 0) {
433                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
434                                       &eb_bh);
435                 if (ret) {
436                         mlog_errno(ret);
437                         goto out;
438                 }
439
440                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
441                 el = &eb->h_list;
442
443                 if (el->l_tree_depth) {
444                         ocfs2_error(inode->i_sb,
445                                     "Inode %lu has non zero tree depth in leaf block %llu\n",
446                                     inode->i_ino,
447                                     (unsigned long long)eb_bh->b_blocknr);
448                         ret = -EROFS;
449                         goto out;
450                 }
451         }
452
453         i = ocfs2_search_extent_list(el, v_cluster);
454         if (i == -1) {
455                 /*
456                  * Holes can be larger than the maximum size of an
457                  * extent, so we return their lengths in a separate
458                  * field.
459                  */
460                 if (hole_len) {
461                         ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
462                                                          el, eb_bh,
463                                                          v_cluster, &len);
464                         if (ret) {
465                                 mlog_errno(ret);
466                                 goto out;
467                         }
468
469                         *hole_len = len;
470                 }
471                 goto out_hole;
472         }
473
474         rec = &el->l_recs[i];
475
476         BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
477
478         if (!rec->e_blkno) {
479                 ocfs2_error(inode->i_sb,
480                             "Inode %lu has bad extent record (%u, %u, 0)\n",
481                             inode->i_ino,
482                             le32_to_cpu(rec->e_cpos),
483                             ocfs2_rec_clusters(el, rec));
484                 ret = -EROFS;
485                 goto out;
486         }
487
488         *ret_rec = *rec;
489
490         /*
491          * Checking for last extent is potentially expensive - we
492          * might have to look at the next leaf over to see if it's
493          * empty.
494          *
495          * The first two checks are to see whether the caller even
496          * cares for this information, and if the extent is at least
497          * the last in it's list.
498          *
499          * If those hold true, then the extent is last if any of the
500          * additional conditions hold true:
501          *  - Extent list is in-inode
502          *  - Extent list is right-most
503          *  - Extent list is 2nd to rightmost, with empty right-most
504          */
505         if (is_last) {
506                 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
507                         if (tree_height == 0)
508                                 *is_last = 1;
509                         else if (eb->h_blkno == di->i_last_eb_blk)
510                                 *is_last = 1;
511                         else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
512                                 ret = ocfs2_last_eb_is_empty(inode, di);
513                                 if (ret < 0) {
514                                         mlog_errno(ret);
515                                         goto out;
516                                 }
517                                 if (ret == 1)
518                                         *is_last = 1;
519                         }
520                 }
521         }
522
523 out_hole:
524         ret = 0;
525 out:
526         brelse(eb_bh);
527         return ret;
528 }
529
530 static void ocfs2_relative_extent_offsets(struct super_block *sb,
531                                           u32 v_cluster,
532                                           struct ocfs2_extent_rec *rec,
533                                           u32 *p_cluster, u32 *num_clusters)
534
535 {
536         u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
537
538         *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
539         *p_cluster = *p_cluster + coff;
540
541         if (num_clusters)
542                 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
543 }
544
545 int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
546                              u32 *p_cluster, u32 *num_clusters,
547                              struct ocfs2_extent_list *el,
548                              unsigned int *extent_flags)
549 {
550         int ret = 0, i;
551         struct buffer_head *eb_bh = NULL;
552         struct ocfs2_extent_block *eb;
553         struct ocfs2_extent_rec *rec;
554         u32 coff;
555
556         if (el->l_tree_depth) {
557                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
558                                       &eb_bh);
559                 if (ret) {
560                         mlog_errno(ret);
561                         goto out;
562                 }
563
564                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
565                 el = &eb->h_list;
566
567                 if (el->l_tree_depth) {
568                         ocfs2_error(inode->i_sb,
569                                     "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
570                                     inode->i_ino,
571                                     (unsigned long long)eb_bh->b_blocknr);
572                         ret = -EROFS;
573                         goto out;
574                 }
575         }
576
577         i = ocfs2_search_extent_list(el, v_cluster);
578         if (i == -1) {
579                 ret = -EROFS;
580                 mlog_errno(ret);
581                 goto out;
582         } else {
583                 rec = &el->l_recs[i];
584                 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
585
586                 if (!rec->e_blkno) {
587                         ocfs2_error(inode->i_sb,
588                                     "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
589                                     inode->i_ino,
590                                     le32_to_cpu(rec->e_cpos),
591                                     ocfs2_rec_clusters(el, rec));
592                         ret = -EROFS;
593                         goto out;
594                 }
595                 coff = v_cluster - le32_to_cpu(rec->e_cpos);
596                 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
597                                                     le64_to_cpu(rec->e_blkno));
598                 *p_cluster = *p_cluster + coff;
599                 if (num_clusters)
600                         *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
601
602                 if (extent_flags)
603                         *extent_flags = rec->e_flags;
604         }
605 out:
606         if (eb_bh)
607                 brelse(eb_bh);
608         return ret;
609 }
610
611 int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
612                        u32 *p_cluster, u32 *num_clusters,
613                        unsigned int *extent_flags)
614 {
615         int ret;
616         unsigned int uninitialized_var(hole_len), flags = 0;
617         struct buffer_head *di_bh = NULL;
618         struct ocfs2_extent_rec rec;
619
620         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
621                 ret = -ERANGE;
622                 mlog_errno(ret);
623                 goto out;
624         }
625
626         ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
627                                       num_clusters, extent_flags);
628         if (ret == 0)
629                 goto out;
630
631         ret = ocfs2_read_inode_block(inode, &di_bh);
632         if (ret) {
633                 mlog_errno(ret);
634                 goto out;
635         }
636
637         ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
638                                          &rec, NULL);
639         if (ret) {
640                 mlog_errno(ret);
641                 goto out;
642         }
643
644         if (rec.e_blkno == 0ULL) {
645                 /*
646                  * A hole was found. Return some canned values that
647                  * callers can key on. If asked for, num_clusters will
648                  * be populated with the size of the hole.
649                  */
650                 *p_cluster = 0;
651                 if (num_clusters) {
652                         *num_clusters = hole_len;
653                 }
654         } else {
655                 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
656                                               p_cluster, num_clusters);
657                 flags = rec.e_flags;
658
659                 ocfs2_extent_map_insert_rec(inode, &rec);
660         }
661
662         if (extent_flags)
663                 *extent_flags = flags;
664
665 out:
666         brelse(di_bh);
667         return ret;
668 }
669
670 /*
671  * This expects alloc_sem to be held. The allocation cannot change at
672  * all while the map is in the process of being updated.
673  */
674 int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
675                                 u64 *ret_count, unsigned int *extent_flags)
676 {
677         int ret;
678         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
679         u32 cpos, num_clusters, p_cluster;
680         u64 boff = 0;
681
682         cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
683
684         ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
685                                  extent_flags);
686         if (ret) {
687                 mlog_errno(ret);
688                 goto out;
689         }
690
691         /*
692          * p_cluster == 0 indicates a hole.
693          */
694         if (p_cluster) {
695                 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
696                 boff += (v_blkno & (u64)(bpc - 1));
697         }
698
699         *p_blkno = boff;
700
701         if (ret_count) {
702                 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
703                 *ret_count -= v_blkno & (u64)(bpc - 1);
704         }
705
706 out:
707         return ret;
708 }
709
710 /*
711  * The ocfs2_fiemap_inline() may be a little bit misleading, since
712  * it not only handles the fiemap for inlined files, but also deals
713  * with the fast symlink, cause they have no difference for extent
714  * mapping per se.
715  */
716 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
717                                struct fiemap_extent_info *fieinfo,
718                                u64 map_start)
719 {
720         int ret;
721         unsigned int id_count;
722         struct ocfs2_dinode *di;
723         u64 phys;
724         u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
725         struct ocfs2_inode_info *oi = OCFS2_I(inode);
726
727         di = (struct ocfs2_dinode *)di_bh->b_data;
728         if (ocfs2_inode_is_fast_symlink(inode))
729                 id_count = ocfs2_fast_symlink_chars(inode->i_sb);
730         else
731                 id_count = le16_to_cpu(di->id2.i_data.id_count);
732
733         if (map_start < id_count) {
734                 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
735                 if (ocfs2_inode_is_fast_symlink(inode))
736                         phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
737                 else
738                         phys += offsetof(struct ocfs2_dinode,
739                                          id2.i_data.id_data);
740
741                 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
742                                               flags);
743                 if (ret < 0)
744                         return ret;
745         }
746
747         return 0;
748 }
749
750 #define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
751
752 int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
753                  u64 map_start, u64 map_len)
754 {
755         int ret, is_last;
756         u32 mapping_end, cpos;
757         unsigned int hole_size;
758         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
759         u64 len_bytes, phys_bytes, virt_bytes;
760         struct buffer_head *di_bh = NULL;
761         struct ocfs2_extent_rec rec;
762
763         ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
764         if (ret)
765                 return ret;
766
767         ret = ocfs2_inode_lock(inode, &di_bh, 0);
768         if (ret) {
769                 mlog_errno(ret);
770                 goto out;
771         }
772
773         down_read(&OCFS2_I(inode)->ip_alloc_sem);
774
775         /*
776          * Handle inline-data and fast symlink separately.
777          */
778         if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
779             ocfs2_inode_is_fast_symlink(inode)) {
780                 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
781                 goto out_unlock;
782         }
783
784         cpos = map_start >> osb->s_clustersize_bits;
785         mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
786                                                map_start + map_len);
787         is_last = 0;
788         while (cpos < mapping_end && !is_last) {
789                 u32 fe_flags;
790
791                 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
792                                                  &hole_size, &rec, &is_last);
793                 if (ret) {
794                         mlog_errno(ret);
795                         goto out_unlock;
796                 }
797
798                 if (rec.e_blkno == 0ULL) {
799                         cpos += hole_size;
800                         continue;
801                 }
802
803                 fe_flags = 0;
804                 if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
805                         fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
806                 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
807                         fe_flags |= FIEMAP_EXTENT_SHARED;
808                 if (is_last)
809                         fe_flags |= FIEMAP_EXTENT_LAST;
810                 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
811                 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
812                 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
813
814                 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
815                                               len_bytes, fe_flags);
816                 if (ret)
817                         break;
818
819                 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
820         }
821
822         if (ret > 0)
823                 ret = 0;
824
825 out_unlock:
826         brelse(di_bh);
827
828         up_read(&OCFS2_I(inode)->ip_alloc_sem);
829
830         ocfs2_inode_unlock(inode, 0);
831 out:
832
833         return ret;
834 }
835
836 /* Is IO overwriting allocated blocks? */
837 int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
838                        u64 map_start, u64 map_len)
839 {
840         int ret = 0, is_last;
841         u32 mapping_end, cpos;
842         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
843         struct ocfs2_extent_rec rec;
844
845         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
846                 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
847                         return ret;
848                 else
849                         return -EAGAIN;
850         }
851
852         cpos = map_start >> osb->s_clustersize_bits;
853         mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
854                                                map_start + map_len);
855         is_last = 0;
856         while (cpos < mapping_end && !is_last) {
857                 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
858                                                  NULL, &rec, &is_last);
859                 if (ret) {
860                         mlog_errno(ret);
861                         goto out;
862                 }
863
864                 if (rec.e_blkno == 0ULL)
865                         break;
866
867                 if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
868                         break;
869
870                 cpos = le32_to_cpu(rec.e_cpos) +
871                         le16_to_cpu(rec.e_leaf_clusters);
872         }
873
874         if (cpos < mapping_end)
875                 ret = -EAGAIN;
876 out:
877         return ret;
878 }
879
880 int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
881 {
882         struct inode *inode = file->f_mapping->host;
883         int ret;
884         unsigned int is_last = 0, is_data = 0;
885         u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
886         u32 cpos, cend, clen, hole_size;
887         u64 extoff, extlen;
888         struct buffer_head *di_bh = NULL;
889         struct ocfs2_extent_rec rec;
890
891         BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
892
893         ret = ocfs2_inode_lock(inode, &di_bh, 0);
894         if (ret) {
895                 mlog_errno(ret);
896                 goto out;
897         }
898
899         down_read(&OCFS2_I(inode)->ip_alloc_sem);
900
901         if (*offset >= i_size_read(inode)) {
902                 ret = -ENXIO;
903                 goto out_unlock;
904         }
905
906         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
907                 if (whence == SEEK_HOLE)
908                         *offset = i_size_read(inode);
909                 goto out_unlock;
910         }
911
912         clen = 0;
913         cpos = *offset >> cs_bits;
914         cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
915
916         while (cpos < cend && !is_last) {
917                 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
918                                                  &rec, &is_last);
919                 if (ret) {
920                         mlog_errno(ret);
921                         goto out_unlock;
922                 }
923
924                 extoff = cpos;
925                 extoff <<= cs_bits;
926
927                 if (rec.e_blkno == 0ULL) {
928                         clen = hole_size;
929                         is_data = 0;
930                 } else {
931                         clen = le16_to_cpu(rec.e_leaf_clusters) -
932                                 (cpos - le32_to_cpu(rec.e_cpos));
933                         is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
934                 }
935
936                 if ((!is_data && whence == SEEK_HOLE) ||
937                     (is_data && whence == SEEK_DATA)) {
938                         if (extoff > *offset)
939                                 *offset = extoff;
940                         goto out_unlock;
941                 }
942
943                 if (!is_last)
944                         cpos += clen;
945         }
946
947         if (whence == SEEK_HOLE) {
948                 extoff = cpos;
949                 extoff <<= cs_bits;
950                 extlen = clen;
951                 extlen <<=  cs_bits;
952
953                 if ((extoff + extlen) > i_size_read(inode))
954                         extlen = i_size_read(inode) - extoff;
955                 extoff += extlen;
956                 if (extoff > *offset)
957                         *offset = extoff;
958                 goto out_unlock;
959         }
960
961         ret = -ENXIO;
962
963 out_unlock:
964
965         brelse(di_bh);
966
967         up_read(&OCFS2_I(inode)->ip_alloc_sem);
968
969         ocfs2_inode_unlock(inode, 0);
970 out:
971         return ret;
972 }
973
974 int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
975                            struct buffer_head *bhs[], int flags,
976                            int (*validate)(struct super_block *sb,
977                                            struct buffer_head *bh))
978 {
979         int rc = 0;
980         u64 p_block, p_count;
981         int i, count, done = 0;
982
983         trace_ocfs2_read_virt_blocks(
984              inode, (unsigned long long)v_block, nr, bhs, flags,
985              validate);
986
987         if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
988             i_size_read(inode)) {
989                 BUG_ON(!(flags & OCFS2_BH_READAHEAD));
990                 goto out;
991         }
992
993         while (done < nr) {
994                 down_read(&OCFS2_I(inode)->ip_alloc_sem);
995                 rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
996                                                  &p_block, &p_count, NULL);
997                 up_read(&OCFS2_I(inode)->ip_alloc_sem);
998                 if (rc) {
999                         mlog_errno(rc);
1000                         break;
1001                 }
1002
1003                 if (!p_block) {
1004                         rc = -EIO;
1005                         mlog(ML_ERROR,
1006                              "Inode #%llu contains a hole at offset %llu\n",
1007                              (unsigned long long)OCFS2_I(inode)->ip_blkno,
1008                              (unsigned long long)(v_block + done) <<
1009                              inode->i_sb->s_blocksize_bits);
1010                         break;
1011                 }
1012
1013                 count = nr - done;
1014                 if (p_count < count)
1015                         count = p_count;
1016
1017                 /*
1018                  * If the caller passed us bhs, they should have come
1019                  * from a previous readahead call to this function.  Thus,
1020                  * they should have the right b_blocknr.
1021                  */
1022                 for (i = 0; i < count; i++) {
1023                         if (!bhs[done + i])
1024                                 continue;
1025                         BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
1026                 }
1027
1028                 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
1029                                        bhs + done, flags, validate);
1030                 if (rc) {
1031                         mlog_errno(rc);
1032                         break;
1033                 }
1034                 done += count;
1035         }
1036
1037 out:
1038         return rc;
1039 }
1040
1041