Merge branch 'for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
[sfrench/cifs-2.6.git] / block / blk-lib.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to generic helpers functions
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/scatterlist.h>
10
11 #include "blk.h"
12
13 static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
14                 gfp_t gfp)
15 {
16         struct bio *new = bio_alloc(gfp, nr_pages);
17
18         if (bio) {
19                 bio_chain(bio, new);
20                 submit_bio(bio);
21         }
22
23         return new;
24 }
25
26 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
27                 sector_t nr_sects, gfp_t gfp_mask, int flags,
28                 struct bio **biop)
29 {
30         struct request_queue *q = bdev_get_queue(bdev);
31         struct bio *bio = *biop;
32         unsigned int granularity;
33         unsigned int op;
34         int alignment;
35         sector_t bs_mask;
36
37         if (!q)
38                 return -ENXIO;
39
40         if (bdev_read_only(bdev))
41                 return -EPERM;
42
43         if (flags & BLKDEV_DISCARD_SECURE) {
44                 if (!blk_queue_secure_erase(q))
45                         return -EOPNOTSUPP;
46                 op = REQ_OP_SECURE_ERASE;
47         } else {
48                 if (!blk_queue_discard(q))
49                         return -EOPNOTSUPP;
50                 op = REQ_OP_DISCARD;
51         }
52
53         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
54         if ((sector | nr_sects) & bs_mask)
55                 return -EINVAL;
56
57         /* Zero-sector (unknown) and one-sector granularities are the same.  */
58         granularity = max(q->limits.discard_granularity >> 9, 1U);
59         alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
60
61         while (nr_sects) {
62                 unsigned int req_sects;
63                 sector_t end_sect, tmp;
64
65                 /* Make sure bi_size doesn't overflow */
66                 req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
67
68                 /**
69                  * If splitting a request, and the next starting sector would be
70                  * misaligned, stop the discard at the previous aligned sector.
71                  */
72                 end_sect = sector + req_sects;
73                 tmp = end_sect;
74                 if (req_sects < nr_sects &&
75                     sector_div(tmp, granularity) != alignment) {
76                         end_sect = end_sect - alignment;
77                         sector_div(end_sect, granularity);
78                         end_sect = end_sect * granularity + alignment;
79                         req_sects = end_sect - sector;
80                 }
81
82                 bio = next_bio(bio, 0, gfp_mask);
83                 bio->bi_iter.bi_sector = sector;
84                 bio_set_dev(bio, bdev);
85                 bio_set_op_attrs(bio, op, 0);
86
87                 bio->bi_iter.bi_size = req_sects << 9;
88                 nr_sects -= req_sects;
89                 sector = end_sect;
90
91                 /*
92                  * We can loop for a long time in here, if someone does
93                  * full device discards (like mkfs). Be nice and allow
94                  * us to schedule out to avoid softlocking if preempt
95                  * is disabled.
96                  */
97                 cond_resched();
98         }
99
100         *biop = bio;
101         return 0;
102 }
103 EXPORT_SYMBOL(__blkdev_issue_discard);
104
105 /**
106  * blkdev_issue_discard - queue a discard
107  * @bdev:       blockdev to issue discard for
108  * @sector:     start sector
109  * @nr_sects:   number of sectors to discard
110  * @gfp_mask:   memory allocation flags (for bio_alloc)
111  * @flags:      BLKDEV_DISCARD_* flags to control behaviour
112  *
113  * Description:
114  *    Issue a discard request for the sectors in question.
115  */
116 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
117                 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
118 {
119         struct bio *bio = NULL;
120         struct blk_plug plug;
121         int ret;
122
123         blk_start_plug(&plug);
124         ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
125                         &bio);
126         if (!ret && bio) {
127                 ret = submit_bio_wait(bio);
128                 if (ret == -EOPNOTSUPP)
129                         ret = 0;
130                 bio_put(bio);
131         }
132         blk_finish_plug(&plug);
133
134         return ret;
135 }
136 EXPORT_SYMBOL(blkdev_issue_discard);
137
138 /**
139  * __blkdev_issue_write_same - generate number of bios with same page
140  * @bdev:       target blockdev
141  * @sector:     start sector
142  * @nr_sects:   number of sectors to write
143  * @gfp_mask:   memory allocation flags (for bio_alloc)
144  * @page:       page containing data to write
145  * @biop:       pointer to anchor bio
146  *
147  * Description:
148  *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
149  */
150 static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
151                 sector_t nr_sects, gfp_t gfp_mask, struct page *page,
152                 struct bio **biop)
153 {
154         struct request_queue *q = bdev_get_queue(bdev);
155         unsigned int max_write_same_sectors;
156         struct bio *bio = *biop;
157         sector_t bs_mask;
158
159         if (!q)
160                 return -ENXIO;
161
162         if (bdev_read_only(bdev))
163                 return -EPERM;
164
165         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
166         if ((sector | nr_sects) & bs_mask)
167                 return -EINVAL;
168
169         if (!bdev_write_same(bdev))
170                 return -EOPNOTSUPP;
171
172         /* Ensure that max_write_same_sectors doesn't overflow bi_size */
173         max_write_same_sectors = UINT_MAX >> 9;
174
175         while (nr_sects) {
176                 bio = next_bio(bio, 1, gfp_mask);
177                 bio->bi_iter.bi_sector = sector;
178                 bio_set_dev(bio, bdev);
179                 bio->bi_vcnt = 1;
180                 bio->bi_io_vec->bv_page = page;
181                 bio->bi_io_vec->bv_offset = 0;
182                 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
183                 bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
184
185                 if (nr_sects > max_write_same_sectors) {
186                         bio->bi_iter.bi_size = max_write_same_sectors << 9;
187                         nr_sects -= max_write_same_sectors;
188                         sector += max_write_same_sectors;
189                 } else {
190                         bio->bi_iter.bi_size = nr_sects << 9;
191                         nr_sects = 0;
192                 }
193                 cond_resched();
194         }
195
196         *biop = bio;
197         return 0;
198 }
199
200 /**
201  * blkdev_issue_write_same - queue a write same operation
202  * @bdev:       target blockdev
203  * @sector:     start sector
204  * @nr_sects:   number of sectors to write
205  * @gfp_mask:   memory allocation flags (for bio_alloc)
206  * @page:       page containing data
207  *
208  * Description:
209  *    Issue a write same request for the sectors in question.
210  */
211 int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
212                                 sector_t nr_sects, gfp_t gfp_mask,
213                                 struct page *page)
214 {
215         struct bio *bio = NULL;
216         struct blk_plug plug;
217         int ret;
218
219         blk_start_plug(&plug);
220         ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
221                         &bio);
222         if (ret == 0 && bio) {
223                 ret = submit_bio_wait(bio);
224                 bio_put(bio);
225         }
226         blk_finish_plug(&plug);
227         return ret;
228 }
229 EXPORT_SYMBOL(blkdev_issue_write_same);
230
231 static int __blkdev_issue_write_zeroes(struct block_device *bdev,
232                 sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
233                 struct bio **biop, unsigned flags)
234 {
235         struct bio *bio = *biop;
236         unsigned int max_write_zeroes_sectors;
237         struct request_queue *q = bdev_get_queue(bdev);
238
239         if (!q)
240                 return -ENXIO;
241
242         if (bdev_read_only(bdev))
243                 return -EPERM;
244
245         /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
246         max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
247
248         if (max_write_zeroes_sectors == 0)
249                 return -EOPNOTSUPP;
250
251         while (nr_sects) {
252                 bio = next_bio(bio, 0, gfp_mask);
253                 bio->bi_iter.bi_sector = sector;
254                 bio_set_dev(bio, bdev);
255                 bio->bi_opf = REQ_OP_WRITE_ZEROES;
256                 if (flags & BLKDEV_ZERO_NOUNMAP)
257                         bio->bi_opf |= REQ_NOUNMAP;
258
259                 if (nr_sects > max_write_zeroes_sectors) {
260                         bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
261                         nr_sects -= max_write_zeroes_sectors;
262                         sector += max_write_zeroes_sectors;
263                 } else {
264                         bio->bi_iter.bi_size = nr_sects << 9;
265                         nr_sects = 0;
266                 }
267                 cond_resched();
268         }
269
270         *biop = bio;
271         return 0;
272 }
273
274 /*
275  * Convert a number of 512B sectors to a number of pages.
276  * The result is limited to a number of pages that can fit into a BIO.
277  * Also make sure that the result is always at least 1 (page) for the cases
278  * where nr_sects is lower than the number of sectors in a page.
279  */
280 static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
281 {
282         sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
283
284         return min(pages, (sector_t)BIO_MAX_PAGES);
285 }
286
287 static int __blkdev_issue_zero_pages(struct block_device *bdev,
288                 sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
289                 struct bio **biop)
290 {
291         struct request_queue *q = bdev_get_queue(bdev);
292         struct bio *bio = *biop;
293         int bi_size = 0;
294         unsigned int sz;
295
296         if (!q)
297                 return -ENXIO;
298
299         if (bdev_read_only(bdev))
300                 return -EPERM;
301
302         while (nr_sects != 0) {
303                 bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
304                                gfp_mask);
305                 bio->bi_iter.bi_sector = sector;
306                 bio_set_dev(bio, bdev);
307                 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
308
309                 while (nr_sects != 0) {
310                         sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
311                         bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
312                         nr_sects -= bi_size >> 9;
313                         sector += bi_size >> 9;
314                         if (bi_size < sz)
315                                 break;
316                 }
317                 cond_resched();
318         }
319
320         *biop = bio;
321         return 0;
322 }
323
324 /**
325  * __blkdev_issue_zeroout - generate number of zero filed write bios
326  * @bdev:       blockdev to issue
327  * @sector:     start sector
328  * @nr_sects:   number of sectors to write
329  * @gfp_mask:   memory allocation flags (for bio_alloc)
330  * @biop:       pointer to anchor bio
331  * @flags:      controls detailed behavior
332  *
333  * Description:
334  *  Zero-fill a block range, either using hardware offload or by explicitly
335  *  writing zeroes to the device.
336  *
337  *  If a device is using logical block provisioning, the underlying space will
338  *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
339  *
340  *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
341  *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
342  */
343 int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
344                 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
345                 unsigned flags)
346 {
347         int ret;
348         sector_t bs_mask;
349
350         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
351         if ((sector | nr_sects) & bs_mask)
352                 return -EINVAL;
353
354         ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
355                         biop, flags);
356         if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
357                 return ret;
358
359         return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
360                                          biop);
361 }
362 EXPORT_SYMBOL(__blkdev_issue_zeroout);
363
364 /**
365  * blkdev_issue_zeroout - zero-fill a block range
366  * @bdev:       blockdev to write
367  * @sector:     start sector
368  * @nr_sects:   number of sectors to write
369  * @gfp_mask:   memory allocation flags (for bio_alloc)
370  * @flags:      controls detailed behavior
371  *
372  * Description:
373  *  Zero-fill a block range, either using hardware offload or by explicitly
374  *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
375  *  valid values for %flags.
376  */
377 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
378                 sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
379 {
380         int ret = 0;
381         sector_t bs_mask;
382         struct bio *bio;
383         struct blk_plug plug;
384         bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
385
386         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
387         if ((sector | nr_sects) & bs_mask)
388                 return -EINVAL;
389
390 retry:
391         bio = NULL;
392         blk_start_plug(&plug);
393         if (try_write_zeroes) {
394                 ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
395                                                   gfp_mask, &bio, flags);
396         } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
397                 ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
398                                                 gfp_mask, &bio);
399         } else {
400                 /* No zeroing offload support */
401                 ret = -EOPNOTSUPP;
402         }
403         if (ret == 0 && bio) {
404                 ret = submit_bio_wait(bio);
405                 bio_put(bio);
406         }
407         blk_finish_plug(&plug);
408         if (ret && try_write_zeroes) {
409                 if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
410                         try_write_zeroes = false;
411                         goto retry;
412                 }
413                 if (!bdev_write_zeroes_sectors(bdev)) {
414                         /*
415                          * Zeroing offload support was indicated, but the
416                          * device reported ILLEGAL REQUEST (for some devices
417                          * there is no non-destructive way to verify whether
418                          * WRITE ZEROES is actually supported).
419                          */
420                         ret = -EOPNOTSUPP;
421                 }
422         }
423
424         return ret;
425 }
426 EXPORT_SYMBOL(blkdev_issue_zeroout);