treewide: Use array_size() in vmalloc()
[sfrench/cifs-2.6.git] / fs / reiserfs / bitmap.c
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 /* Reiserfs block (de)allocator, bitmap-based. */
5
6 #include <linux/time.h>
7 #include "reiserfs.h"
8 #include <linux/errno.h>
9 #include <linux/buffer_head.h>
10 #include <linux/kernel.h>
11 #include <linux/pagemap.h>
12 #include <linux/vmalloc.h>
13 #include <linux/quotaops.h>
14 #include <linux/seq_file.h>
15
16 #define PREALLOCATION_SIZE 9
17
18 /* different reiserfs block allocator options */
19
20 #define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
21
22 #define  _ALLOC_concentrating_formatted_nodes 0
23 #define  _ALLOC_displacing_large_files 1
24 #define  _ALLOC_displacing_new_packing_localities 2
25 #define  _ALLOC_old_hashed_relocation 3
26 #define  _ALLOC_new_hashed_relocation 4
27 #define  _ALLOC_skip_busy 5
28 #define  _ALLOC_displace_based_on_dirid 6
29 #define  _ALLOC_hashed_formatted_nodes 7
30 #define  _ALLOC_old_way 8
31 #define  _ALLOC_hundredth_slices 9
32 #define  _ALLOC_dirid_groups 10
33 #define  _ALLOC_oid_groups 11
34 #define  _ALLOC_packing_groups 12
35
36 #define  concentrating_formatted_nodes(s)       test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
37 #define  displacing_large_files(s)              test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
38 #define  displacing_new_packing_localities(s)   test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
39
40 #define SET_OPTION(optname) \
41    do { \
42         reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
43         set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
44     } while(0)
45 #define TEST_OPTION(optname, s) \
46     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
47
48 static inline void get_bit_address(struct super_block *s,
49                                    b_blocknr_t block,
50                                    unsigned int *bmap_nr,
51                                    unsigned int *offset)
52 {
53         /*
54          * It is in the bitmap block number equal to the block
55          * number divided by the number of bits in a block.
56          */
57         *bmap_nr = block >> (s->s_blocksize_bits + 3);
58         /* Within that bitmap block it is located at bit offset *offset. */
59         *offset = block & ((s->s_blocksize << 3) - 1);
60 }
61
62 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
63 {
64         unsigned int bmap, offset;
65         unsigned int bmap_count = reiserfs_bmap_count(s);
66
67         if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
68                 reiserfs_error(s, "vs-4010",
69                                "block number is out of range %lu (%u)",
70                                block, SB_BLOCK_COUNT(s));
71                 return 0;
72         }
73
74         get_bit_address(s, block, &bmap, &offset);
75
76         /*
77          * Old format filesystem? Unlikely, but the bitmaps are all
78          * up front so we need to account for it.
79          */
80         if (unlikely(test_bit(REISERFS_OLD_FORMAT,
81                               &REISERFS_SB(s)->s_properties))) {
82                 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
83                 if (block >= bmap1 &&
84                     block <= bmap1 + bmap_count) {
85                         reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
86                                        "can't be freed or reused",
87                                        block, bmap_count);
88                         return 0;
89                 }
90         } else {
91                 if (offset == 0) {
92                         reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
93                                        "can't be freed or reused",
94                                        block, bmap_count);
95                         return 0;
96                 }
97         }
98
99         if (bmap >= bmap_count) {
100                 reiserfs_error(s, "vs-4030", "bitmap for requested block "
101                                "is out of range: block=%lu, bitmap_nr=%u",
102                                block, bmap);
103                 return 0;
104         }
105
106         if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
107                 reiserfs_error(s, "vs-4050", "this is root block (%u), "
108                                "it must be busy", SB_ROOT_BLOCK(s));
109                 return 0;
110         }
111
112         return 1;
113 }
114
115 /*
116  * Searches in journal structures for a given block number (bmap, off).
117  * If block is found in reiserfs journal it suggests next free block
118  * candidate to test.
119  */
120 static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
121                                       int off, int *next)
122 {
123         b_blocknr_t tmp;
124
125         if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
126                 if (tmp) {      /* hint supplied */
127                         *next = tmp;
128                         PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
129                 } else {
130                         (*next) = off + 1;  /* inc offset to avoid looping. */
131                         PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
132                 }
133                 PROC_INFO_INC(s, scan_bitmap.retry);
134                 return 1;
135         }
136         return 0;
137 }
138
139 /*
140  * Searches for a window of zero bits with given minimum and maximum
141  * lengths in one bitmap block
142  */
143 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
144                              unsigned int bmap_n, int *beg, int boundary,
145                              int min, int max, int unfm)
146 {
147         struct super_block *s = th->t_super;
148         struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
149         struct buffer_head *bh;
150         int end, next;
151         int org = *beg;
152
153         BUG_ON(!th->t_trans_id);
154         RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
155                "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
156         PROC_INFO_INC(s, scan_bitmap.bmap);
157
158         if (!bi) {
159                 reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
160                                "for bitmap %d", bmap_n);
161                 return 0;
162         }
163
164         bh = reiserfs_read_bitmap_block(s, bmap_n);
165         if (bh == NULL)
166                 return 0;
167
168         while (1) {
169 cont:
170                 if (bi->free_count < min) {
171                         brelse(bh);
172                         return 0;       /* No free blocks in this bitmap */
173                 }
174
175                 /* search for a first zero bit -- beginning of a window */
176                 *beg = reiserfs_find_next_zero_le_bit
177                     ((unsigned long *)(bh->b_data), boundary, *beg);
178
179                 /*
180                  * search for a zero bit fails or the rest of bitmap block
181                  * cannot contain a zero window of minimum size
182                  */
183                 if (*beg + min > boundary) {
184                         brelse(bh);
185                         return 0;
186                 }
187
188                 if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
189                         continue;
190                 /* first zero bit found; we check next bits */
191                 for (end = *beg + 1;; end++) {
192                         if (end >= *beg + max || end >= boundary
193                             || reiserfs_test_le_bit(end, bh->b_data)) {
194                                 next = end;
195                                 break;
196                         }
197
198                         /*
199                          * finding the other end of zero bit window requires
200                          * looking into journal structures (in case of
201                          * searching for free blocks for unformatted nodes)
202                          */
203                         if (unfm && is_block_in_journal(s, bmap_n, end, &next))
204                                 break;
205                 }
206
207                 /*
208                  * now (*beg) points to beginning of zero bits window,
209                  * (end) points to one bit after the window end
210                  */
211
212                 /* found window of proper size */
213                 if (end - *beg >= min) {
214                         int i;
215                         reiserfs_prepare_for_journal(s, bh, 1);
216                         /*
217                          * try to set all blocks used checking are
218                          * they still free
219                          */
220                         for (i = *beg; i < end; i++) {
221                                 /* Don't check in journal again. */
222                                 if (reiserfs_test_and_set_le_bit
223                                     (i, bh->b_data)) {
224                                         /*
225                                          * bit was set by another process while
226                                          * we slept in prepare_for_journal()
227                                          */
228                                         PROC_INFO_INC(s, scan_bitmap.stolen);
229
230                                         /*
231                                          * we can continue with smaller set
232                                          * of allocated blocks, if length of
233                                          * this set is more or equal to `min'
234                                          */
235                                         if (i >= *beg + min) {
236                                                 end = i;
237                                                 break;
238                                         }
239
240                                         /*
241                                          * otherwise we clear all bit
242                                          * were set ...
243                                          */
244                                         while (--i >= *beg)
245                                                 reiserfs_clear_le_bit
246                                                     (i, bh->b_data);
247                                         reiserfs_restore_prepared_buffer(s, bh);
248                                         *beg = org;
249
250                                         /*
251                                          * Search again in current block
252                                          * from beginning
253                                          */
254                                         goto cont;
255                                 }
256                         }
257                         bi->free_count -= (end - *beg);
258                         journal_mark_dirty(th, bh);
259                         brelse(bh);
260
261                         /* free block count calculation */
262                         reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
263                                                      1);
264                         PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
265                         journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
266
267                         return end - (*beg);
268                 } else {
269                         *beg = next;
270                 }
271         }
272 }
273
274 static int bmap_hash_id(struct super_block *s, u32 id)
275 {
276         char *hash_in = NULL;
277         unsigned long hash;
278         unsigned bm;
279
280         if (id <= 2) {
281                 bm = 1;
282         } else {
283                 hash_in = (char *)(&id);
284                 hash = keyed_hash(hash_in, 4);
285                 bm = hash % reiserfs_bmap_count(s);
286                 if (!bm)
287                         bm = 1;
288         }
289         /* this can only be true when SB_BMAP_NR = 1 */
290         if (bm >= reiserfs_bmap_count(s))
291                 bm = 0;
292         return bm;
293 }
294
295 /*
296  * hashes the id and then returns > 0 if the block group for the
297  * corresponding hash is full
298  */
299 static inline int block_group_used(struct super_block *s, u32 id)
300 {
301         int bm = bmap_hash_id(s, id);
302         struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
303
304         /*
305          * If we don't have cached information on this bitmap block, we're
306          * going to have to load it later anyway. Loading it here allows us
307          * to make a better decision. This favors long-term performance gain
308          * with a better on-disk layout vs. a short term gain of skipping the
309          * read and potentially having a bad placement.
310          */
311         if (info->free_count == UINT_MAX) {
312                 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
313                 brelse(bh);
314         }
315
316         if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
317                 return 0;
318         }
319         return 1;
320 }
321
322 /*
323  * the packing is returned in disk byte order
324  */
325 __le32 reiserfs_choose_packing(struct inode * dir)
326 {
327         __le32 packing;
328         if (TEST_OPTION(packing_groups, dir->i_sb)) {
329                 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
330                 /*
331                  * some versions of reiserfsck expect packing locality 1 to be
332                  * special
333                  */
334                 if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
335                         packing = INODE_PKEY(dir)->k_objectid;
336                 else
337                         packing = INODE_PKEY(dir)->k_dir_id;
338         } else
339                 packing = INODE_PKEY(dir)->k_objectid;
340         return packing;
341 }
342
343 /*
344  * Tries to find contiguous zero bit window (given size) in given region of
345  * bitmap and place new blocks there. Returns number of allocated blocks.
346  */
347 static int scan_bitmap(struct reiserfs_transaction_handle *th,
348                        b_blocknr_t * start, b_blocknr_t finish,
349                        int min, int max, int unfm, sector_t file_block)
350 {
351         int nr_allocated = 0;
352         struct super_block *s = th->t_super;
353         unsigned int bm, off;
354         unsigned int end_bm, end_off;
355         unsigned int off_max = s->s_blocksize << 3;
356
357         BUG_ON(!th->t_trans_id);
358         PROC_INFO_INC(s, scan_bitmap.call);
359
360         /* No point in looking for more free blocks */
361         if (SB_FREE_BLOCKS(s) <= 0)
362                 return 0;
363
364         get_bit_address(s, *start, &bm, &off);
365         get_bit_address(s, finish, &end_bm, &end_off);
366         if (bm > reiserfs_bmap_count(s))
367                 return 0;
368         if (end_bm > reiserfs_bmap_count(s))
369                 end_bm = reiserfs_bmap_count(s);
370
371         /*
372          * When the bitmap is more than 10% free, anyone can allocate.
373          * When it's less than 10% free, only files that already use the
374          * bitmap are allowed. Once we pass 80% full, this restriction
375          * is lifted.
376          *
377          * We do this so that files that grow later still have space close to
378          * their original allocation. This improves locality, and presumably
379          * performance as a result.
380          *
381          * This is only an allocation policy and does not make up for getting a
382          * bad hint. Decent hinting must be implemented for this to work well.
383          */
384         if (TEST_OPTION(skip_busy, s)
385             && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
386                 for (; bm < end_bm; bm++, off = 0) {
387                         if ((off && (!unfm || (file_block != 0)))
388                             || SB_AP_BITMAP(s)[bm].free_count >
389                             (s->s_blocksize << 3) / 10)
390                                 nr_allocated =
391                                     scan_bitmap_block(th, bm, &off, off_max,
392                                                       min, max, unfm);
393                         if (nr_allocated)
394                                 goto ret;
395                 }
396                 /* we know from above that start is a reasonable number */
397                 get_bit_address(s, *start, &bm, &off);
398         }
399
400         for (; bm < end_bm; bm++, off = 0) {
401                 nr_allocated =
402                     scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
403                 if (nr_allocated)
404                         goto ret;
405         }
406
407         nr_allocated =
408             scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
409
410 ret:
411         *start = bm * off_max + off;
412         return nr_allocated;
413
414 }
415
416 static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
417                                  struct inode *inode, b_blocknr_t block,
418                                  int for_unformatted)
419 {
420         struct super_block *s = th->t_super;
421         struct reiserfs_super_block *rs;
422         struct buffer_head *sbh, *bmbh;
423         struct reiserfs_bitmap_info *apbi;
424         unsigned int nr, offset;
425
426         BUG_ON(!th->t_trans_id);
427         PROC_INFO_INC(s, free_block);
428         rs = SB_DISK_SUPER_BLOCK(s);
429         sbh = SB_BUFFER_WITH_SB(s);
430         apbi = SB_AP_BITMAP(s);
431
432         get_bit_address(s, block, &nr, &offset);
433
434         if (nr >= reiserfs_bmap_count(s)) {
435                 reiserfs_error(s, "vs-4075", "block %lu is out of range",
436                                block);
437                 return;
438         }
439
440         bmbh = reiserfs_read_bitmap_block(s, nr);
441         if (!bmbh)
442                 return;
443
444         reiserfs_prepare_for_journal(s, bmbh, 1);
445
446         /* clear bit for the given block in bit map */
447         if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
448                 reiserfs_error(s, "vs-4080",
449                                "block %lu: bit already cleared", block);
450         }
451         apbi[nr].free_count++;
452         journal_mark_dirty(th, bmbh);
453         brelse(bmbh);
454
455         reiserfs_prepare_for_journal(s, sbh, 1);
456         /* update super block */
457         set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
458
459         journal_mark_dirty(th, sbh);
460         if (for_unformatted) {
461                 int depth = reiserfs_write_unlock_nested(s);
462                 dquot_free_block_nodirty(inode, 1);
463                 reiserfs_write_lock_nested(s, depth);
464         }
465 }
466
467 void reiserfs_free_block(struct reiserfs_transaction_handle *th,
468                          struct inode *inode, b_blocknr_t block,
469                          int for_unformatted)
470 {
471         struct super_block *s = th->t_super;
472
473         BUG_ON(!th->t_trans_id);
474         RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
475         if (!is_reusable(s, block, 1))
476                 return;
477
478         if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
479                 reiserfs_error(th->t_super, "bitmap-4072",
480                                "Trying to free block outside file system "
481                                "boundaries (%lu > %lu)",
482                                block, sb_block_count(REISERFS_SB(s)->s_rs));
483                 return;
484         }
485         /* mark it before we clear it, just in case */
486         journal_mark_freed(th, s, block);
487         _reiserfs_free_block(th, inode, block, for_unformatted);
488 }
489
490 /* preallocated blocks don't need to be run through journal_mark_freed */
491 static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
492                                          struct inode *inode, b_blocknr_t block)
493 {
494         BUG_ON(!th->t_trans_id);
495         RFALSE(!th->t_super,
496                "vs-4060: trying to free block on nonexistent device");
497         if (!is_reusable(th->t_super, block, 1))
498                 return;
499         _reiserfs_free_block(th, inode, block, 1);
500 }
501
502 static void __discard_prealloc(struct reiserfs_transaction_handle *th,
503                                struct reiserfs_inode_info *ei)
504 {
505         unsigned long save = ei->i_prealloc_block;
506         int dirty = 0;
507         struct inode *inode = &ei->vfs_inode;
508
509         BUG_ON(!th->t_trans_id);
510 #ifdef CONFIG_REISERFS_CHECK
511         if (ei->i_prealloc_count < 0)
512                 reiserfs_error(th->t_super, "zam-4001",
513                                "inode has negative prealloc blocks count.");
514 #endif
515         while (ei->i_prealloc_count > 0) {
516                 b_blocknr_t block_to_free;
517
518                 /*
519                  * reiserfs_free_prealloc_block can drop the write lock,
520                  * which could allow another caller to free the same block.
521                  * We can protect against it by modifying the prealloc
522                  * state before calling it.
523                  */
524                 block_to_free = ei->i_prealloc_block++;
525                 ei->i_prealloc_count--;
526                 reiserfs_free_prealloc_block(th, inode, block_to_free);
527                 dirty = 1;
528         }
529         if (dirty)
530                 reiserfs_update_sd(th, inode);
531         ei->i_prealloc_block = save;
532         list_del_init(&ei->i_prealloc_list);
533 }
534
535 /* FIXME: It should be inline function */
536 void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
537                                struct inode *inode)
538 {
539         struct reiserfs_inode_info *ei = REISERFS_I(inode);
540
541         BUG_ON(!th->t_trans_id);
542         if (ei->i_prealloc_count)
543                 __discard_prealloc(th, ei);
544 }
545
546 void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
547 {
548         struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
549
550         BUG_ON(!th->t_trans_id);
551         while (!list_empty(plist)) {
552                 struct reiserfs_inode_info *ei;
553                 ei = list_entry(plist->next, struct reiserfs_inode_info,
554                                 i_prealloc_list);
555 #ifdef CONFIG_REISERFS_CHECK
556                 if (!ei->i_prealloc_count) {
557                         reiserfs_error(th->t_super, "zam-4001",
558                                        "inode is in prealloc list but has "
559                                        "no preallocated blocks.");
560                 }
561 #endif
562                 __discard_prealloc(th, ei);
563         }
564 }
565
566 void reiserfs_init_alloc_options(struct super_block *s)
567 {
568         set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
569         set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
570         set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
571 }
572
573 /* block allocator related options are parsed here */
574 int reiserfs_parse_alloc_options(struct super_block *s, char *options)
575 {
576         char *this_char, *value;
577
578         /* clear default settings */
579         REISERFS_SB(s)->s_alloc_options.bits = 0;
580
581         while ((this_char = strsep(&options, ":")) != NULL) {
582                 if ((value = strchr(this_char, '=')) != NULL)
583                         *value++ = 0;
584
585                 if (!strcmp(this_char, "concentrating_formatted_nodes")) {
586                         int temp;
587                         SET_OPTION(concentrating_formatted_nodes);
588                         temp = (value
589                                 && *value) ? simple_strtoul(value, &value,
590                                                             0) : 10;
591                         if (temp <= 0 || temp > 100) {
592                                 REISERFS_SB(s)->s_alloc_options.border = 10;
593                         } else {
594                                 REISERFS_SB(s)->s_alloc_options.border =
595                                     100 / temp;
596                         }
597                         continue;
598                 }
599                 if (!strcmp(this_char, "displacing_large_files")) {
600                         SET_OPTION(displacing_large_files);
601                         REISERFS_SB(s)->s_alloc_options.large_file_size =
602                             (value
603                              && *value) ? simple_strtoul(value, &value, 0) : 16;
604                         continue;
605                 }
606                 if (!strcmp(this_char, "displacing_new_packing_localities")) {
607                         SET_OPTION(displacing_new_packing_localities);
608                         continue;
609                 }
610
611                 if (!strcmp(this_char, "old_hashed_relocation")) {
612                         SET_OPTION(old_hashed_relocation);
613                         continue;
614                 }
615
616                 if (!strcmp(this_char, "new_hashed_relocation")) {
617                         SET_OPTION(new_hashed_relocation);
618                         continue;
619                 }
620
621                 if (!strcmp(this_char, "dirid_groups")) {
622                         SET_OPTION(dirid_groups);
623                         continue;
624                 }
625                 if (!strcmp(this_char, "oid_groups")) {
626                         SET_OPTION(oid_groups);
627                         continue;
628                 }
629                 if (!strcmp(this_char, "packing_groups")) {
630                         SET_OPTION(packing_groups);
631                         continue;
632                 }
633                 if (!strcmp(this_char, "hashed_formatted_nodes")) {
634                         SET_OPTION(hashed_formatted_nodes);
635                         continue;
636                 }
637
638                 if (!strcmp(this_char, "skip_busy")) {
639                         SET_OPTION(skip_busy);
640                         continue;
641                 }
642
643                 if (!strcmp(this_char, "hundredth_slices")) {
644                         SET_OPTION(hundredth_slices);
645                         continue;
646                 }
647
648                 if (!strcmp(this_char, "old_way")) {
649                         SET_OPTION(old_way);
650                         continue;
651                 }
652
653                 if (!strcmp(this_char, "displace_based_on_dirid")) {
654                         SET_OPTION(displace_based_on_dirid);
655                         continue;
656                 }
657
658                 if (!strcmp(this_char, "preallocmin")) {
659                         REISERFS_SB(s)->s_alloc_options.preallocmin =
660                             (value
661                              && *value) ? simple_strtoul(value, &value, 0) : 4;
662                         continue;
663                 }
664
665                 if (!strcmp(this_char, "preallocsize")) {
666                         REISERFS_SB(s)->s_alloc_options.preallocsize =
667                             (value
668                              && *value) ? simple_strtoul(value, &value,
669                                                          0) :
670                             PREALLOCATION_SIZE;
671                         continue;
672                 }
673
674                 reiserfs_warning(s, "zam-4001", "unknown option - %s",
675                                  this_char);
676                 return 1;
677         }
678
679         reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
680         return 0;
681 }
682
683 static void print_sep(struct seq_file *seq, int *first)
684 {
685         if (!*first)
686                 seq_puts(seq, ":");
687         else
688                 *first = 0;
689 }
690
691 void show_alloc_options(struct seq_file *seq, struct super_block *s)
692 {
693         int first = 1;
694
695         if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
696                 (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
697                 return;
698
699         seq_puts(seq, ",alloc=");
700
701         if (TEST_OPTION(concentrating_formatted_nodes, s)) {
702                 print_sep(seq, &first);
703                 if (REISERFS_SB(s)->s_alloc_options.border != 10) {
704                         seq_printf(seq, "concentrating_formatted_nodes=%d",
705                                 100 / REISERFS_SB(s)->s_alloc_options.border);
706                 } else
707                         seq_puts(seq, "concentrating_formatted_nodes");
708         }
709         if (TEST_OPTION(displacing_large_files, s)) {
710                 print_sep(seq, &first);
711                 if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
712                         seq_printf(seq, "displacing_large_files=%lu",
713                             REISERFS_SB(s)->s_alloc_options.large_file_size);
714                 } else
715                         seq_puts(seq, "displacing_large_files");
716         }
717         if (TEST_OPTION(displacing_new_packing_localities, s)) {
718                 print_sep(seq, &first);
719                 seq_puts(seq, "displacing_new_packing_localities");
720         }
721         if (TEST_OPTION(old_hashed_relocation, s)) {
722                 print_sep(seq, &first);
723                 seq_puts(seq, "old_hashed_relocation");
724         }
725         if (TEST_OPTION(new_hashed_relocation, s)) {
726                 print_sep(seq, &first);
727                 seq_puts(seq, "new_hashed_relocation");
728         }
729         if (TEST_OPTION(dirid_groups, s)) {
730                 print_sep(seq, &first);
731                 seq_puts(seq, "dirid_groups");
732         }
733         if (TEST_OPTION(oid_groups, s)) {
734                 print_sep(seq, &first);
735                 seq_puts(seq, "oid_groups");
736         }
737         if (TEST_OPTION(packing_groups, s)) {
738                 print_sep(seq, &first);
739                 seq_puts(seq, "packing_groups");
740         }
741         if (TEST_OPTION(hashed_formatted_nodes, s)) {
742                 print_sep(seq, &first);
743                 seq_puts(seq, "hashed_formatted_nodes");
744         }
745         if (TEST_OPTION(skip_busy, s)) {
746                 print_sep(seq, &first);
747                 seq_puts(seq, "skip_busy");
748         }
749         if (TEST_OPTION(hundredth_slices, s)) {
750                 print_sep(seq, &first);
751                 seq_puts(seq, "hundredth_slices");
752         }
753         if (TEST_OPTION(old_way, s)) {
754                 print_sep(seq, &first);
755                 seq_puts(seq, "old_way");
756         }
757         if (TEST_OPTION(displace_based_on_dirid, s)) {
758                 print_sep(seq, &first);
759                 seq_puts(seq, "displace_based_on_dirid");
760         }
761         if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
762                 print_sep(seq, &first);
763                 seq_printf(seq, "preallocmin=%d",
764                                 REISERFS_SB(s)->s_alloc_options.preallocmin);
765         }
766         if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
767                 print_sep(seq, &first);
768                 seq_printf(seq, "preallocsize=%d",
769                                 REISERFS_SB(s)->s_alloc_options.preallocsize);
770         }
771 }
772
773 static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
774 {
775         char *hash_in;
776
777         if (hint->formatted_node) {
778                 hash_in = (char *)&hint->key.k_dir_id;
779         } else {
780                 if (!hint->inode) {
781                         /*hint->search_start = hint->beg;*/
782                         hash_in = (char *)&hint->key.k_dir_id;
783                 } else
784                     if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
785                         hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
786                 else
787                         hash_in =
788                             (char *)(&INODE_PKEY(hint->inode)->k_objectid);
789         }
790
791         hint->search_start =
792             hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
793 }
794
795 /*
796  * Relocation based on dirid, hashing them into a given bitmap block
797  * files. Formatted nodes are unaffected, a separate policy covers them
798  */
799 static void dirid_groups(reiserfs_blocknr_hint_t * hint)
800 {
801         unsigned long hash;
802         __u32 dirid = 0;
803         int bm = 0;
804         struct super_block *sb = hint->th->t_super;
805
806         if (hint->inode)
807                 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
808         else if (hint->formatted_node)
809                 dirid = hint->key.k_dir_id;
810
811         if (dirid) {
812                 bm = bmap_hash_id(sb, dirid);
813                 hash = bm * (sb->s_blocksize << 3);
814                 /* give a portion of the block group to metadata */
815                 if (hint->inode)
816                         hash += sb->s_blocksize / 2;
817                 hint->search_start = hash;
818         }
819 }
820
821 /*
822  * Relocation based on oid, hashing them into a given bitmap block
823  * files. Formatted nodes are unaffected, a separate policy covers them
824  */
825 static void oid_groups(reiserfs_blocknr_hint_t * hint)
826 {
827         if (hint->inode) {
828                 unsigned long hash;
829                 __u32 oid;
830                 __u32 dirid;
831                 int bm;
832
833                 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
834
835                 /*
836                  * keep the root dir and it's first set of subdirs close to
837                  * the start of the disk
838                  */
839                 if (dirid <= 2)
840                         hash = (hint->inode->i_sb->s_blocksize << 3);
841                 else {
842                         oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
843                         bm = bmap_hash_id(hint->inode->i_sb, oid);
844                         hash = bm * (hint->inode->i_sb->s_blocksize << 3);
845                 }
846                 hint->search_start = hash;
847         }
848 }
849
850 /*
851  * returns 1 if it finds an indirect item and gets valid hint info
852  * from it, otherwise 0
853  */
854 static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
855 {
856         struct treepath *path;
857         struct buffer_head *bh;
858         struct item_head *ih;
859         int pos_in_item;
860         __le32 *item;
861         int ret = 0;
862
863         /*
864          * reiserfs code can call this function w/o pointer to path
865          * structure supplied; then we rely on supplied search_start
866          */
867         if (!hint->path)
868                 return 0;
869
870         path = hint->path;
871         bh = get_last_bh(path);
872         RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
873         ih = tp_item_head(path);
874         pos_in_item = path->pos_in_item;
875         item = tp_item_body(path);
876
877         hint->search_start = bh->b_blocknr;
878
879         /*
880          * for indirect item: go to left and look for the first non-hole entry
881          * in the indirect item
882          */
883         if (!hint->formatted_node && is_indirect_le_ih(ih)) {
884                 if (pos_in_item == I_UNFM_NUM(ih))
885                         pos_in_item--;
886                 while (pos_in_item >= 0) {
887                         int t = get_block_num(item, pos_in_item);
888                         if (t) {
889                                 hint->search_start = t;
890                                 ret = 1;
891                                 break;
892                         }
893                         pos_in_item--;
894                 }
895         }
896
897         /* does result value fit into specified region? */
898         return ret;
899 }
900
901 /*
902  * should be, if formatted node, then try to put on first part of the device
903  * specified as number of percent with mount option device, else try to put
904  * on last of device.  This is not to say it is good code to do so,
905  * but the effect should be measured.
906  */
907 static inline void set_border_in_hint(struct super_block *s,
908                                       reiserfs_blocknr_hint_t * hint)
909 {
910         b_blocknr_t border =
911             SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
912
913         if (hint->formatted_node)
914                 hint->end = border - 1;
915         else
916                 hint->beg = border;
917 }
918
919 static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
920 {
921         if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
922                 hint->search_start =
923                     hint->beg +
924                     keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
925                                4) % (hint->end - hint->beg);
926         else
927                 hint->search_start =
928                     hint->beg +
929                     keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
930                                4) % (hint->end - hint->beg);
931 }
932
933 static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
934 {
935         char *hash_in;
936
937         if (!hint->inode)
938                 hash_in = (char *)&hint->key.k_dir_id;
939         else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
940                 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
941         else
942                 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
943
944         hint->search_start =
945             hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
946 }
947
948 static inline int
949 this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
950                                                    hint)
951 {
952         return hint->block ==
953             REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
954 }
955
956 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
957 static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
958 {
959         struct in_core_key *key = &hint->key;
960
961         hint->th->displace_new_blocks = 0;
962         hint->search_start =
963             hint->beg + keyed_hash((char *)(&key->k_objectid),
964                                    4) % (hint->end - hint->beg);
965 }
966 #endif
967
968 static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
969 {
970         b_blocknr_t border;
971         u32 hash_in;
972
973         if (hint->formatted_node || hint->inode == NULL) {
974                 return 0;
975         }
976
977         hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
978         border =
979             hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
980                                          4) % (hint->end - hint->beg - 1);
981         if (border > hint->search_start)
982                 hint->search_start = border;
983
984         return 1;
985 }
986
987 static inline int old_way(reiserfs_blocknr_hint_t * hint)
988 {
989         b_blocknr_t border;
990
991         if (hint->formatted_node || hint->inode == NULL) {
992                 return 0;
993         }
994
995         border =
996             hint->beg +
997             le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
998                                                               hint->beg);
999         if (border > hint->search_start)
1000                 hint->search_start = border;
1001
1002         return 1;
1003 }
1004
1005 static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
1006 {
1007         struct in_core_key *key = &hint->key;
1008         b_blocknr_t slice_start;
1009
1010         slice_start =
1011             (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
1012         if (slice_start > hint->search_start
1013             || slice_start + (hint->end / 100) <= hint->search_start) {
1014                 hint->search_start = slice_start;
1015         }
1016 }
1017
1018 static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1019                                    int amount_needed)
1020 {
1021         struct super_block *s = hint->th->t_super;
1022         int unfm_hint;
1023
1024         hint->beg = 0;
1025         hint->end = SB_BLOCK_COUNT(s) - 1;
1026
1027         /* This is former border algorithm. Now with tunable border offset */
1028         if (concentrating_formatted_nodes(s))
1029                 set_border_in_hint(s, hint);
1030
1031 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1032         /*
1033          * whenever we create a new directory, we displace it.  At first
1034          * we will hash for location, later we might look for a moderately
1035          * empty place for it
1036          */
1037         if (displacing_new_packing_localities(s)
1038             && hint->th->displace_new_blocks) {
1039                 displace_new_packing_locality(hint);
1040
1041                 /*
1042                  * we do not continue determine_search_start,
1043                  * if new packing locality is being displaced
1044                  */
1045                 return;
1046         }
1047 #endif
1048
1049         /*
1050          * all persons should feel encouraged to add more special cases
1051          * here and test them
1052          */
1053
1054         if (displacing_large_files(s) && !hint->formatted_node
1055             && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
1056                 displace_large_file(hint);
1057                 return;
1058         }
1059
1060         /*
1061          * if none of our special cases is relevant, use the left
1062          * neighbor in the tree order of the new node we are allocating for
1063          */
1064         if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1065                 hash_formatted_node(hint);
1066                 return;
1067         }
1068
1069         unfm_hint = get_left_neighbor(hint);
1070
1071         /*
1072          * Mimic old block allocator behaviour, that is if VFS allowed for
1073          * preallocation, new blocks are displaced based on directory ID.
1074          * Also, if suggested search_start is less than last preallocated
1075          * block, we start searching from it, assuming that HDD dataflow
1076          * is faster in forward direction
1077          */
1078         if (TEST_OPTION(old_way, s)) {
1079                 if (!hint->formatted_node) {
1080                         if (!reiserfs_hashed_relocation(s))
1081                                 old_way(hint);
1082                         else if (!reiserfs_no_unhashed_relocation(s))
1083                                 old_hashed_relocation(hint);
1084
1085                         if (hint->inode
1086                             && hint->search_start <
1087                             REISERFS_I(hint->inode)->i_prealloc_block)
1088                                 hint->search_start =
1089                                     REISERFS_I(hint->inode)->i_prealloc_block;
1090                 }
1091                 return;
1092         }
1093
1094         /* This is an approach proposed by Hans */
1095         if (TEST_OPTION(hundredth_slices, s)
1096             && !(displacing_large_files(s) && !hint->formatted_node)) {
1097                 hundredth_slices(hint);
1098                 return;
1099         }
1100
1101         /* old_hashed_relocation only works on unformatted */
1102         if (!unfm_hint && !hint->formatted_node &&
1103             TEST_OPTION(old_hashed_relocation, s)) {
1104                 old_hashed_relocation(hint);
1105         }
1106
1107         /* new_hashed_relocation works with both formatted/unformatted nodes */
1108         if ((!unfm_hint || hint->formatted_node) &&
1109             TEST_OPTION(new_hashed_relocation, s)) {
1110                 new_hashed_relocation(hint);
1111         }
1112
1113         /* dirid grouping works only on unformatted nodes */
1114         if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1115                 dirid_groups(hint);
1116         }
1117 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1118         if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1119                 dirid_groups(hint);
1120         }
1121 #endif
1122
1123         /* oid grouping works only on unformatted nodes */
1124         if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
1125                 oid_groups(hint);
1126         }
1127         return;
1128 }
1129
1130 static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1131 {
1132         /* make minimum size a mount option and benchmark both ways */
1133         /* we preallocate blocks only for regular files, specific size */
1134         /* benchmark preallocating always and see what happens */
1135
1136         hint->prealloc_size = 0;
1137
1138         if (!hint->formatted_node && hint->preallocate) {
1139                 if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
1140                     && hint->inode->i_size >=
1141                     REISERFS_SB(hint->th->t_super)->s_alloc_options.
1142                     preallocmin * hint->inode->i_sb->s_blocksize)
1143                         hint->prealloc_size =
1144                             REISERFS_SB(hint->th->t_super)->s_alloc_options.
1145                             preallocsize - 1;
1146         }
1147         return CARRY_ON;
1148 }
1149
1150 static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1151                                                  b_blocknr_t * new_blocknrs,
1152                                                  b_blocknr_t start,
1153                                                  b_blocknr_t finish, int min,
1154                                                  int amount_needed,
1155                                                  int prealloc_size)
1156 {
1157         int rest = amount_needed;
1158         int nr_allocated;
1159
1160         while (rest > 0 && start <= finish) {
1161                 nr_allocated = scan_bitmap(hint->th, &start, finish, min,
1162                                            rest + prealloc_size,
1163                                            !hint->formatted_node, hint->block);
1164
1165                 if (nr_allocated == 0)  /* no new blocks allocated, return */
1166                         break;
1167
1168                 /* fill free_blocknrs array first */
1169                 while (rest > 0 && nr_allocated > 0) {
1170                         *new_blocknrs++ = start++;
1171                         rest--;
1172                         nr_allocated--;
1173                 }
1174
1175                 /* do we have something to fill prealloc. array also ? */
1176                 if (nr_allocated > 0) {
1177                         /*
1178                          * it means prealloc_size was greater that 0 and
1179                          * we do preallocation
1180                          */
1181                         list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1182                                  &SB_JOURNAL(hint->th->t_super)->
1183                                  j_prealloc_list);
1184                         REISERFS_I(hint->inode)->i_prealloc_block = start;
1185                         REISERFS_I(hint->inode)->i_prealloc_count =
1186                             nr_allocated;
1187                         break;
1188                 }
1189         }
1190
1191         return (amount_needed - rest);
1192 }
1193
1194 static inline int blocknrs_and_prealloc_arrays_from_search_start
1195     (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1196      int amount_needed) {
1197         struct super_block *s = hint->th->t_super;
1198         b_blocknr_t start = hint->search_start;
1199         b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1200         int passno = 0;
1201         int nr_allocated = 0;
1202         int depth;
1203
1204         determine_prealloc_size(hint);
1205         if (!hint->formatted_node) {
1206                 int quota_ret;
1207 #ifdef REISERQUOTA_DEBUG
1208                 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1209                                "reiserquota: allocating %d blocks id=%u",
1210                                amount_needed, hint->inode->i_uid);
1211 #endif
1212                 depth = reiserfs_write_unlock_nested(s);
1213                 quota_ret =
1214                     dquot_alloc_block_nodirty(hint->inode, amount_needed);
1215                 if (quota_ret) {        /* Quota exceeded? */
1216                         reiserfs_write_lock_nested(s, depth);
1217                         return QUOTA_EXCEEDED;
1218                 }
1219                 if (hint->preallocate && hint->prealloc_size) {
1220 #ifdef REISERQUOTA_DEBUG
1221                         reiserfs_debug(s, REISERFS_DEBUG_CODE,
1222                                        "reiserquota: allocating (prealloc) %d blocks id=%u",
1223                                        hint->prealloc_size, hint->inode->i_uid);
1224 #endif
1225                         quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1226                                                          hint->prealloc_size);
1227                         if (quota_ret)
1228                                 hint->preallocate = hint->prealloc_size = 0;
1229                 }
1230                 /* for unformatted nodes, force large allocations */
1231                 reiserfs_write_lock_nested(s, depth);
1232         }
1233
1234         do {
1235                 switch (passno++) {
1236                 case 0: /* Search from hint->search_start to end of disk */
1237                         start = hint->search_start;
1238                         finish = SB_BLOCK_COUNT(s) - 1;
1239                         break;
1240                 case 1: /* Search from hint->beg to hint->search_start */
1241                         start = hint->beg;
1242                         finish = hint->search_start;
1243                         break;
1244                 case 2: /* Last chance: Search from 0 to hint->beg */
1245                         start = 0;
1246                         finish = hint->beg;
1247                         break;
1248                 default:
1249                         /* We've tried searching everywhere, not enough space */
1250                         /* Free the blocks */
1251                         if (!hint->formatted_node) {
1252 #ifdef REISERQUOTA_DEBUG
1253                                 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1254                                                "reiserquota: freeing (nospace) %d blocks id=%u",
1255                                                amount_needed +
1256                                                hint->prealloc_size -
1257                                                nr_allocated,
1258                                                hint->inode->i_uid);
1259 #endif
1260                                 /* Free not allocated blocks */
1261                                 depth = reiserfs_write_unlock_nested(s);
1262                                 dquot_free_block_nodirty(hint->inode,
1263                                         amount_needed + hint->prealloc_size -
1264                                         nr_allocated);
1265                                 reiserfs_write_lock_nested(s, depth);
1266                         }
1267                         while (nr_allocated--)
1268                                 reiserfs_free_block(hint->th, hint->inode,
1269                                                     new_blocknrs[nr_allocated],
1270                                                     !hint->formatted_node);
1271
1272                         return NO_DISK_SPACE;
1273                 }
1274         } while ((nr_allocated += allocate_without_wrapping_disk(hint,
1275                                                                  new_blocknrs +
1276                                                                  nr_allocated,
1277                                                                  start, finish,
1278                                                                  1,
1279                                                                  amount_needed -
1280                                                                  nr_allocated,
1281                                                                  hint->
1282                                                                  prealloc_size))
1283                  < amount_needed);
1284         if (!hint->formatted_node &&
1285             amount_needed + hint->prealloc_size >
1286             nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1287                 /* Some of preallocation blocks were not allocated */
1288 #ifdef REISERQUOTA_DEBUG
1289                 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1290                                "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1291                                amount_needed + hint->prealloc_size -
1292                                nr_allocated -
1293                                REISERFS_I(hint->inode)->i_prealloc_count,
1294                                hint->inode->i_uid);
1295 #endif
1296
1297                 depth = reiserfs_write_unlock_nested(s);
1298                 dquot_free_block_nodirty(hint->inode, amount_needed +
1299                                          hint->prealloc_size - nr_allocated -
1300                                          REISERFS_I(hint->inode)->
1301                                          i_prealloc_count);
1302                 reiserfs_write_lock_nested(s, depth);
1303         }
1304
1305         return CARRY_ON;
1306 }
1307
1308 /* grab new blocknrs from preallocated list */
1309 /* return amount still needed after using them */
1310 static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1311                                               b_blocknr_t * new_blocknrs,
1312                                               int amount_needed)
1313 {
1314         struct inode *inode = hint->inode;
1315
1316         if (REISERFS_I(inode)->i_prealloc_count > 0) {
1317                 while (amount_needed) {
1318
1319                         *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1320                         REISERFS_I(inode)->i_prealloc_count--;
1321
1322                         amount_needed--;
1323
1324                         if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1325                                 list_del(&REISERFS_I(inode)->i_prealloc_list);
1326                                 break;
1327                         }
1328                 }
1329         }
1330         /* return amount still needed after using preallocated blocks */
1331         return amount_needed;
1332 }
1333
1334 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1335                                b_blocknr_t *new_blocknrs,
1336                                int amount_needed,
1337                                /* Amount of blocks we have already reserved */
1338                                int reserved_by_us)
1339 {
1340         int initial_amount_needed = amount_needed;
1341         int ret;
1342         struct super_block *s = hint->th->t_super;
1343
1344         /* Check if there is enough space, taking into account reserved space */
1345         if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1346             amount_needed - reserved_by_us)
1347                 return NO_DISK_SPACE;
1348         /* should this be if !hint->inode &&  hint->preallocate? */
1349         /* do you mean hint->formatted_node can be removed ? - Zam */
1350         /*
1351          * hint->formatted_node cannot be removed because we try to access
1352          * inode information here, and there is often no inode associated with
1353          * metadata allocations - green
1354          */
1355
1356         if (!hint->formatted_node && hint->preallocate) {
1357                 amount_needed = use_preallocated_list_if_available
1358                     (hint, new_blocknrs, amount_needed);
1359
1360                 /*
1361                  * We have all the block numbers we need from the
1362                  * prealloc list
1363                  */
1364                 if (amount_needed == 0)
1365                         return CARRY_ON;
1366                 new_blocknrs += (initial_amount_needed - amount_needed);
1367         }
1368
1369         /* find search start and save it in hint structure */
1370         determine_search_start(hint, amount_needed);
1371         if (hint->search_start >= SB_BLOCK_COUNT(s))
1372                 hint->search_start = SB_BLOCK_COUNT(s) - 1;
1373
1374         /* allocation itself; fill new_blocknrs and preallocation arrays */
1375         ret = blocknrs_and_prealloc_arrays_from_search_start
1376             (hint, new_blocknrs, amount_needed);
1377
1378         /*
1379          * We used prealloc. list to fill (partially) new_blocknrs array.
1380          * If final allocation fails we need to return blocks back to
1381          * prealloc. list or just free them. -- Zam (I chose second
1382          * variant)
1383          */
1384         if (ret != CARRY_ON) {
1385                 while (amount_needed++ < initial_amount_needed) {
1386                         reiserfs_free_block(hint->th, hint->inode,
1387                                             *(--new_blocknrs), 1);
1388                 }
1389         }
1390         return ret;
1391 }
1392
1393 void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1394                                     struct buffer_head *bh,
1395                                     struct reiserfs_bitmap_info *info)
1396 {
1397         unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1398
1399         /* The first bit must ALWAYS be 1 */
1400         if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1401                 reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1402                                "corrupted: first bit must be 1", bh->b_blocknr);
1403
1404         info->free_count = 0;
1405
1406         while (--cur >= (unsigned long *)bh->b_data) {
1407                 /* 0 and ~0 are special, we can optimize for them */
1408                 if (*cur == 0)
1409                         info->free_count += BITS_PER_LONG;
1410                 else if (*cur != ~0L)   /* A mix, investigate */
1411                         info->free_count += BITS_PER_LONG - hweight_long(*cur);
1412         }
1413 }
1414
1415 struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1416                                                unsigned int bitmap)
1417 {
1418         b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
1419         struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1420         struct buffer_head *bh;
1421
1422         /*
1423          * Way old format filesystems had the bitmaps packed up front.
1424          * I doubt there are any of these left, but just in case...
1425          */
1426         if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1427                               &REISERFS_SB(sb)->s_properties)))
1428                 block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1429         else if (bitmap == 0)
1430                 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1431
1432         bh = sb_bread(sb, block);
1433         if (bh == NULL)
1434                 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1435                                  "reading failed", __func__, block);
1436         else {
1437                 if (buffer_locked(bh)) {
1438                         int depth;
1439                         PROC_INFO_INC(sb, scan_bitmap.wait);
1440                         depth = reiserfs_write_unlock_nested(sb);
1441                         __wait_on_buffer(bh);
1442                         reiserfs_write_lock_nested(sb, depth);
1443                 }
1444                 BUG_ON(!buffer_uptodate(bh));
1445                 BUG_ON(atomic_read(&bh->b_count) == 0);
1446
1447                 if (info->free_count == UINT_MAX)
1448                         reiserfs_cache_bitmap_metadata(sb, bh, info);
1449         }
1450
1451         return bh;
1452 }
1453
1454 int reiserfs_init_bitmap_cache(struct super_block *sb)
1455 {
1456         struct reiserfs_bitmap_info *bitmap;
1457         unsigned int bmap_nr = reiserfs_bmap_count(sb);
1458
1459         bitmap = vmalloc(array_size(bmap_nr, sizeof(*bitmap)));
1460         if (bitmap == NULL)
1461                 return -ENOMEM;
1462
1463         memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
1464
1465         SB_AP_BITMAP(sb) = bitmap;
1466
1467         return 0;
1468 }
1469
1470 void reiserfs_free_bitmap_cache(struct super_block *sb)
1471 {
1472         if (SB_AP_BITMAP(sb)) {
1473                 vfree(SB_AP_BITMAP(sb));
1474                 SB_AP_BITMAP(sb) = NULL;
1475         }
1476 }