Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
[sfrench/cifs-2.6.git] / fs / mpage.c
1 /*
2  * fs/mpage.c
3  *
4  * Copyright (C) 2002, Linus Torvalds.
5  *
6  * Contains functions related to preparing and submitting BIOs which contain
7  * multiple pagecache pages.
8  *
9  * 15May2002    Andrew Morton
10  *              Initial version
11  * 27Jun2002    axboe@suse.de
12  *              use bio_add_page() to build bio's just the right size
13  */
14
15 #include <linux/kernel.h>
16 #include <linux/export.h>
17 #include <linux/mm.h>
18 #include <linux/kdev_t.h>
19 #include <linux/gfp.h>
20 #include <linux/bio.h>
21 #include <linux/fs.h>
22 #include <linux/buffer_head.h>
23 #include <linux/blkdev.h>
24 #include <linux/highmem.h>
25 #include <linux/prefetch.h>
26 #include <linux/mpage.h>
27 #include <linux/mm_inline.h>
28 #include <linux/writeback.h>
29 #include <linux/backing-dev.h>
30 #include <linux/pagevec.h>
31 #include <linux/cleancache.h>
32 #include "internal.h"
33
34 /*
35  * I/O completion handler for multipage BIOs.
36  *
37  * The mpage code never puts partial pages into a BIO (except for end-of-file).
38  * If a page does not map to a contiguous run of blocks then it simply falls
39  * back to block_read_full_page().
40  *
41  * Why is this?  If a page's completion depends on a number of different BIOs
42  * which can complete in any order (or at the same time) then determining the
43  * status of that page is hard.  See end_buffer_async_read() for the details.
44  * There is no point in duplicating all that complexity.
45  */
46 static void mpage_end_io(struct bio *bio)
47 {
48         struct bio_vec *bv;
49         int i;
50
51         bio_for_each_segment_all(bv, bio, i) {
52                 struct page *page = bv->bv_page;
53                 page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
54         }
55
56         bio_put(bio);
57 }
58
59 static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
60 {
61         bio->bi_end_io = mpage_end_io;
62         bio_set_op_attrs(bio, op, op_flags);
63         guard_bio_eod(op, bio);
64         submit_bio(bio);
65         return NULL;
66 }
67
68 static struct bio *
69 mpage_alloc(struct block_device *bdev,
70                 sector_t first_sector, int nr_vecs,
71                 gfp_t gfp_flags)
72 {
73         struct bio *bio;
74
75         /* Restrict the given (page cache) mask for slab allocations */
76         gfp_flags &= GFP_KERNEL;
77         bio = bio_alloc(gfp_flags, nr_vecs);
78
79         if (bio == NULL && (current->flags & PF_MEMALLOC)) {
80                 while (!bio && (nr_vecs /= 2))
81                         bio = bio_alloc(gfp_flags, nr_vecs);
82         }
83
84         if (bio) {
85                 bio->bi_bdev = bdev;
86                 bio->bi_iter.bi_sector = first_sector;
87         }
88         return bio;
89 }
90
91 /*
92  * support function for mpage_readpages.  The fs supplied get_block might
93  * return an up to date buffer.  This is used to map that buffer into
94  * the page, which allows readpage to avoid triggering a duplicate call
95  * to get_block.
96  *
97  * The idea is to avoid adding buffers to pages that don't already have
98  * them.  So when the buffer is up to date and the page size == block size,
99  * this marks the page up to date instead of adding new buffers.
100  */
101 static void 
102 map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) 
103 {
104         struct inode *inode = page->mapping->host;
105         struct buffer_head *page_bh, *head;
106         int block = 0;
107
108         if (!page_has_buffers(page)) {
109                 /*
110                  * don't make any buffers if there is only one buffer on
111                  * the page and the page just needs to be set up to date
112                  */
113                 if (inode->i_blkbits == PAGE_SHIFT &&
114                     buffer_uptodate(bh)) {
115                         SetPageUptodate(page);    
116                         return;
117                 }
118                 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
119         }
120         head = page_buffers(page);
121         page_bh = head;
122         do {
123                 if (block == page_block) {
124                         page_bh->b_state = bh->b_state;
125                         page_bh->b_bdev = bh->b_bdev;
126                         page_bh->b_blocknr = bh->b_blocknr;
127                         break;
128                 }
129                 page_bh = page_bh->b_this_page;
130                 block++;
131         } while (page_bh != head);
132 }
133
134 /*
135  * This is the worker routine which does all the work of mapping the disk
136  * blocks and constructs largest possible bios, submits them for IO if the
137  * blocks are not contiguous on the disk.
138  *
139  * We pass a buffer_head back and forth and use its buffer_mapped() flag to
140  * represent the validity of its disk mapping and to decide when to do the next
141  * get_block() call.
142  */
143 static struct bio *
144 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
145                 sector_t *last_block_in_bio, struct buffer_head *map_bh,
146                 unsigned long *first_logical_block, get_block_t get_block,
147                 gfp_t gfp)
148 {
149         struct inode *inode = page->mapping->host;
150         const unsigned blkbits = inode->i_blkbits;
151         const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
152         const unsigned blocksize = 1 << blkbits;
153         sector_t block_in_file;
154         sector_t last_block;
155         sector_t last_block_in_file;
156         sector_t blocks[MAX_BUF_PER_PAGE];
157         unsigned page_block;
158         unsigned first_hole = blocks_per_page;
159         struct block_device *bdev = NULL;
160         int length;
161         int fully_mapped = 1;
162         unsigned nblocks;
163         unsigned relative_block;
164
165         if (page_has_buffers(page))
166                 goto confused;
167
168         block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
169         last_block = block_in_file + nr_pages * blocks_per_page;
170         last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
171         if (last_block > last_block_in_file)
172                 last_block = last_block_in_file;
173         page_block = 0;
174
175         /*
176          * Map blocks using the result from the previous get_blocks call first.
177          */
178         nblocks = map_bh->b_size >> blkbits;
179         if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
180                         block_in_file < (*first_logical_block + nblocks)) {
181                 unsigned map_offset = block_in_file - *first_logical_block;
182                 unsigned last = nblocks - map_offset;
183
184                 for (relative_block = 0; ; relative_block++) {
185                         if (relative_block == last) {
186                                 clear_buffer_mapped(map_bh);
187                                 break;
188                         }
189                         if (page_block == blocks_per_page)
190                                 break;
191                         blocks[page_block] = map_bh->b_blocknr + map_offset +
192                                                 relative_block;
193                         page_block++;
194                         block_in_file++;
195                 }
196                 bdev = map_bh->b_bdev;
197         }
198
199         /*
200          * Then do more get_blocks calls until we are done with this page.
201          */
202         map_bh->b_page = page;
203         while (page_block < blocks_per_page) {
204                 map_bh->b_state = 0;
205                 map_bh->b_size = 0;
206
207                 if (block_in_file < last_block) {
208                         map_bh->b_size = (last_block-block_in_file) << blkbits;
209                         if (get_block(inode, block_in_file, map_bh, 0))
210                                 goto confused;
211                         *first_logical_block = block_in_file;
212                 }
213
214                 if (!buffer_mapped(map_bh)) {
215                         fully_mapped = 0;
216                         if (first_hole == blocks_per_page)
217                                 first_hole = page_block;
218                         page_block++;
219                         block_in_file++;
220                         continue;
221                 }
222
223                 /* some filesystems will copy data into the page during
224                  * the get_block call, in which case we don't want to
225                  * read it again.  map_buffer_to_page copies the data
226                  * we just collected from get_block into the page's buffers
227                  * so readpage doesn't have to repeat the get_block call
228                  */
229                 if (buffer_uptodate(map_bh)) {
230                         map_buffer_to_page(page, map_bh, page_block);
231                         goto confused;
232                 }
233         
234                 if (first_hole != blocks_per_page)
235                         goto confused;          /* hole -> non-hole */
236
237                 /* Contiguous blocks? */
238                 if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
239                         goto confused;
240                 nblocks = map_bh->b_size >> blkbits;
241                 for (relative_block = 0; ; relative_block++) {
242                         if (relative_block == nblocks) {
243                                 clear_buffer_mapped(map_bh);
244                                 break;
245                         } else if (page_block == blocks_per_page)
246                                 break;
247                         blocks[page_block] = map_bh->b_blocknr+relative_block;
248                         page_block++;
249                         block_in_file++;
250                 }
251                 bdev = map_bh->b_bdev;
252         }
253
254         if (first_hole != blocks_per_page) {
255                 zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
256                 if (first_hole == 0) {
257                         SetPageUptodate(page);
258                         unlock_page(page);
259                         goto out;
260                 }
261         } else if (fully_mapped) {
262                 SetPageMappedToDisk(page);
263         }
264
265         if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
266             cleancache_get_page(page) == 0) {
267                 SetPageUptodate(page);
268                 goto confused;
269         }
270
271         /*
272          * This page will go to BIO.  Do we need to send this BIO off first?
273          */
274         if (bio && (*last_block_in_bio != blocks[0] - 1))
275                 bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
276
277 alloc_new:
278         if (bio == NULL) {
279                 if (first_hole == blocks_per_page) {
280                         if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
281                                                                 page))
282                                 goto out;
283                 }
284                 bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
285                                 min_t(int, nr_pages, BIO_MAX_PAGES), gfp);
286                 if (bio == NULL)
287                         goto confused;
288         }
289
290         length = first_hole << blkbits;
291         if (bio_add_page(bio, page, length, 0) < length) {
292                 bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
293                 goto alloc_new;
294         }
295
296         relative_block = block_in_file - *first_logical_block;
297         nblocks = map_bh->b_size >> blkbits;
298         if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
299             (first_hole != blocks_per_page))
300                 bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
301         else
302                 *last_block_in_bio = blocks[blocks_per_page - 1];
303 out:
304         return bio;
305
306 confused:
307         if (bio)
308                 bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
309         if (!PageUptodate(page))
310                 block_read_full_page(page, get_block);
311         else
312                 unlock_page(page);
313         goto out;
314 }
315
316 /**
317  * mpage_readpages - populate an address space with some pages & start reads against them
318  * @mapping: the address_space
319  * @pages: The address of a list_head which contains the target pages.  These
320  *   pages have their ->index populated and are otherwise uninitialised.
321  *   The page at @pages->prev has the lowest file offset, and reads should be
322  *   issued in @pages->prev to @pages->next order.
323  * @nr_pages: The number of pages at *@pages
324  * @get_block: The filesystem's block mapper function.
325  *
326  * This function walks the pages and the blocks within each page, building and
327  * emitting large BIOs.
328  *
329  * If anything unusual happens, such as:
330  *
331  * - encountering a page which has buffers
332  * - encountering a page which has a non-hole after a hole
333  * - encountering a page with non-contiguous blocks
334  *
335  * then this code just gives up and calls the buffer_head-based read function.
336  * It does handle a page which has holes at the end - that is a common case:
337  * the end-of-file on blocksize < PAGE_SIZE setups.
338  *
339  * BH_Boundary explanation:
340  *
341  * There is a problem.  The mpage read code assembles several pages, gets all
342  * their disk mappings, and then submits them all.  That's fine, but obtaining
343  * the disk mappings may require I/O.  Reads of indirect blocks, for example.
344  *
345  * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
346  * submitted in the following order:
347  *      12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
348  *
349  * because the indirect block has to be read to get the mappings of blocks
350  * 13,14,15,16.  Obviously, this impacts performance.
351  *
352  * So what we do it to allow the filesystem's get_block() function to set
353  * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
354  * after this one will require I/O against a block which is probably close to
355  * this one.  So you should push what I/O you have currently accumulated.
356  *
357  * This all causes the disk requests to be issued in the correct order.
358  */
359 int
360 mpage_readpages(struct address_space *mapping, struct list_head *pages,
361                                 unsigned nr_pages, get_block_t get_block)
362 {
363         struct bio *bio = NULL;
364         unsigned page_idx;
365         sector_t last_block_in_bio = 0;
366         struct buffer_head map_bh;
367         unsigned long first_logical_block = 0;
368         gfp_t gfp = readahead_gfp_mask(mapping);
369
370         map_bh.b_state = 0;
371         map_bh.b_size = 0;
372         for (page_idx = 0; page_idx < nr_pages; page_idx++) {
373                 struct page *page = lru_to_page(pages);
374
375                 prefetchw(&page->flags);
376                 list_del(&page->lru);
377                 if (!add_to_page_cache_lru(page, mapping,
378                                         page->index,
379                                         gfp)) {
380                         bio = do_mpage_readpage(bio, page,
381                                         nr_pages - page_idx,
382                                         &last_block_in_bio, &map_bh,
383                                         &first_logical_block,
384                                         get_block, gfp);
385                 }
386                 put_page(page);
387         }
388         BUG_ON(!list_empty(pages));
389         if (bio)
390                 mpage_bio_submit(REQ_OP_READ, 0, bio);
391         return 0;
392 }
393 EXPORT_SYMBOL(mpage_readpages);
394
395 /*
396  * This isn't called much at all
397  */
398 int mpage_readpage(struct page *page, get_block_t get_block)
399 {
400         struct bio *bio = NULL;
401         sector_t last_block_in_bio = 0;
402         struct buffer_head map_bh;
403         unsigned long first_logical_block = 0;
404         gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
405
406         map_bh.b_state = 0;
407         map_bh.b_size = 0;
408         bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
409                         &map_bh, &first_logical_block, get_block, gfp);
410         if (bio)
411                 mpage_bio_submit(REQ_OP_READ, 0, bio);
412         return 0;
413 }
414 EXPORT_SYMBOL(mpage_readpage);
415
416 /*
417  * Writing is not so simple.
418  *
419  * If the page has buffers then they will be used for obtaining the disk
420  * mapping.  We only support pages which are fully mapped-and-dirty, with a
421  * special case for pages which are unmapped at the end: end-of-file.
422  *
423  * If the page has no buffers (preferred) then the page is mapped here.
424  *
425  * If all blocks are found to be contiguous then the page can go into the
426  * BIO.  Otherwise fall back to the mapping's writepage().
427  * 
428  * FIXME: This code wants an estimate of how many pages are still to be
429  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
430  * just allocate full-size (16-page) BIOs.
431  */
432
433 struct mpage_data {
434         struct bio *bio;
435         sector_t last_block_in_bio;
436         get_block_t *get_block;
437         unsigned use_writepage;
438 };
439
440 /*
441  * We have our BIO, so we can now mark the buffers clean.  Make
442  * sure to only clean buffers which we know we'll be writing.
443  */
444 static void clean_buffers(struct page *page, unsigned first_unmapped)
445 {
446         unsigned buffer_counter = 0;
447         struct buffer_head *bh, *head;
448         if (!page_has_buffers(page))
449                 return;
450         head = page_buffers(page);
451         bh = head;
452
453         do {
454                 if (buffer_counter++ == first_unmapped)
455                         break;
456                 clear_buffer_dirty(bh);
457                 bh = bh->b_this_page;
458         } while (bh != head);
459
460         /*
461          * we cannot drop the bh if the page is not uptodate or a concurrent
462          * readpage would fail to serialize with the bh and it would read from
463          * disk before we reach the platter.
464          */
465         if (buffer_heads_over_limit && PageUptodate(page))
466                 try_to_free_buffers(page);
467 }
468
469 static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
470                       void *data)
471 {
472         struct mpage_data *mpd = data;
473         struct bio *bio = mpd->bio;
474         struct address_space *mapping = page->mapping;
475         struct inode *inode = page->mapping->host;
476         const unsigned blkbits = inode->i_blkbits;
477         unsigned long end_index;
478         const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
479         sector_t last_block;
480         sector_t block_in_file;
481         sector_t blocks[MAX_BUF_PER_PAGE];
482         unsigned page_block;
483         unsigned first_unmapped = blocks_per_page;
484         struct block_device *bdev = NULL;
485         int boundary = 0;
486         sector_t boundary_block = 0;
487         struct block_device *boundary_bdev = NULL;
488         int length;
489         struct buffer_head map_bh;
490         loff_t i_size = i_size_read(inode);
491         int ret = 0;
492         int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : 0);
493
494         if (page_has_buffers(page)) {
495                 struct buffer_head *head = page_buffers(page);
496                 struct buffer_head *bh = head;
497
498                 /* If they're all mapped and dirty, do it */
499                 page_block = 0;
500                 do {
501                         BUG_ON(buffer_locked(bh));
502                         if (!buffer_mapped(bh)) {
503                                 /*
504                                  * unmapped dirty buffers are created by
505                                  * __set_page_dirty_buffers -> mmapped data
506                                  */
507                                 if (buffer_dirty(bh))
508                                         goto confused;
509                                 if (first_unmapped == blocks_per_page)
510                                         first_unmapped = page_block;
511                                 continue;
512                         }
513
514                         if (first_unmapped != blocks_per_page)
515                                 goto confused;  /* hole -> non-hole */
516
517                         if (!buffer_dirty(bh) || !buffer_uptodate(bh))
518                                 goto confused;
519                         if (page_block) {
520                                 if (bh->b_blocknr != blocks[page_block-1] + 1)
521                                         goto confused;
522                         }
523                         blocks[page_block++] = bh->b_blocknr;
524                         boundary = buffer_boundary(bh);
525                         if (boundary) {
526                                 boundary_block = bh->b_blocknr;
527                                 boundary_bdev = bh->b_bdev;
528                         }
529                         bdev = bh->b_bdev;
530                 } while ((bh = bh->b_this_page) != head);
531
532                 if (first_unmapped)
533                         goto page_is_mapped;
534
535                 /*
536                  * Page has buffers, but they are all unmapped. The page was
537                  * created by pagein or read over a hole which was handled by
538                  * block_read_full_page().  If this address_space is also
539                  * using mpage_readpages then this can rarely happen.
540                  */
541                 goto confused;
542         }
543
544         /*
545          * The page has no buffers: map it to disk
546          */
547         BUG_ON(!PageUptodate(page));
548         block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
549         last_block = (i_size - 1) >> blkbits;
550         map_bh.b_page = page;
551         for (page_block = 0; page_block < blocks_per_page; ) {
552
553                 map_bh.b_state = 0;
554                 map_bh.b_size = 1 << blkbits;
555                 if (mpd->get_block(inode, block_in_file, &map_bh, 1))
556                         goto confused;
557                 if (buffer_new(&map_bh))
558                         unmap_underlying_metadata(map_bh.b_bdev,
559                                                 map_bh.b_blocknr);
560                 if (buffer_boundary(&map_bh)) {
561                         boundary_block = map_bh.b_blocknr;
562                         boundary_bdev = map_bh.b_bdev;
563                 }
564                 if (page_block) {
565                         if (map_bh.b_blocknr != blocks[page_block-1] + 1)
566                                 goto confused;
567                 }
568                 blocks[page_block++] = map_bh.b_blocknr;
569                 boundary = buffer_boundary(&map_bh);
570                 bdev = map_bh.b_bdev;
571                 if (block_in_file == last_block)
572                         break;
573                 block_in_file++;
574         }
575         BUG_ON(page_block == 0);
576
577         first_unmapped = page_block;
578
579 page_is_mapped:
580         end_index = i_size >> PAGE_SHIFT;
581         if (page->index >= end_index) {
582                 /*
583                  * The page straddles i_size.  It must be zeroed out on each
584                  * and every writepage invocation because it may be mmapped.
585                  * "A file is mapped in multiples of the page size.  For a file
586                  * that is not a multiple of the page size, the remaining memory
587                  * is zeroed when mapped, and writes to that region are not
588                  * written out to the file."
589                  */
590                 unsigned offset = i_size & (PAGE_SIZE - 1);
591
592                 if (page->index > end_index || !offset)
593                         goto confused;
594                 zero_user_segment(page, offset, PAGE_SIZE);
595         }
596
597         /*
598          * This page will go to BIO.  Do we need to send this BIO off first?
599          */
600         if (bio && mpd->last_block_in_bio != blocks[0] - 1)
601                 bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
602
603 alloc_new:
604         if (bio == NULL) {
605                 if (first_unmapped == blocks_per_page) {
606                         if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
607                                                                 page, wbc)) {
608                                 clean_buffers(page, first_unmapped);
609                                 goto out;
610                         }
611                 }
612                 bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
613                                 BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
614                 if (bio == NULL)
615                         goto confused;
616
617                 wbc_init_bio(wbc, bio);
618         }
619
620         /*
621          * Must try to add the page before marking the buffer clean or
622          * the confused fail path above (OOM) will be very confused when
623          * it finds all bh marked clean (i.e. it will not write anything)
624          */
625         wbc_account_io(wbc, page, PAGE_SIZE);
626         length = first_unmapped << blkbits;
627         if (bio_add_page(bio, page, length, 0) < length) {
628                 bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
629                 goto alloc_new;
630         }
631
632         clean_buffers(page, first_unmapped);
633
634         BUG_ON(PageWriteback(page));
635         set_page_writeback(page);
636         unlock_page(page);
637         if (boundary || (first_unmapped != blocks_per_page)) {
638                 bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
639                 if (boundary_block) {
640                         write_boundary_block(boundary_bdev,
641                                         boundary_block, 1 << blkbits);
642                 }
643         } else {
644                 mpd->last_block_in_bio = blocks[blocks_per_page - 1];
645         }
646         goto out;
647
648 confused:
649         if (bio)
650                 bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
651
652         if (mpd->use_writepage) {
653                 ret = mapping->a_ops->writepage(page, wbc);
654         } else {
655                 ret = -EAGAIN;
656                 goto out;
657         }
658         /*
659          * The caller has a ref on the inode, so *mapping is stable
660          */
661         mapping_set_error(mapping, ret);
662 out:
663         mpd->bio = bio;
664         return ret;
665 }
666
667 /**
668  * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
669  * @mapping: address space structure to write
670  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
671  * @get_block: the filesystem's block mapper function.
672  *             If this is NULL then use a_ops->writepage.  Otherwise, go
673  *             direct-to-BIO.
674  *
675  * This is a library function, which implements the writepages()
676  * address_space_operation.
677  *
678  * If a page is already under I/O, generic_writepages() skips it, even
679  * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
680  * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
681  * and msync() need to guarantee that all the data which was dirty at the time
682  * the call was made get new I/O started against them.  If wbc->sync_mode is
683  * WB_SYNC_ALL then we were called for data integrity and we must wait for
684  * existing IO to complete.
685  */
686 int
687 mpage_writepages(struct address_space *mapping,
688                 struct writeback_control *wbc, get_block_t get_block)
689 {
690         struct blk_plug plug;
691         int ret;
692
693         blk_start_plug(&plug);
694
695         if (!get_block)
696                 ret = generic_writepages(mapping, wbc);
697         else {
698                 struct mpage_data mpd = {
699                         .bio = NULL,
700                         .last_block_in_bio = 0,
701                         .get_block = get_block,
702                         .use_writepage = 1,
703                 };
704
705                 ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
706                 if (mpd.bio) {
707                         int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
708                                   WRITE_SYNC : 0);
709                         mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
710                 }
711         }
712         blk_finish_plug(&plug);
713         return ret;
714 }
715 EXPORT_SYMBOL(mpage_writepages);
716
717 int mpage_writepage(struct page *page, get_block_t get_block,
718         struct writeback_control *wbc)
719 {
720         struct mpage_data mpd = {
721                 .bio = NULL,
722                 .last_block_in_bio = 0,
723                 .get_block = get_block,
724                 .use_writepage = 0,
725         };
726         int ret = __mpage_writepage(page, wbc, &mpd);
727         if (mpd.bio) {
728                 int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
729                           WRITE_SYNC : 0);
730                 mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
731         }
732         return ret;
733 }
734 EXPORT_SYMBOL(mpage_writepage);