md: document lifetime of internal rdev pointer.
[sfrench/cifs-2.6.git] / drivers / md / raid10.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _RAID10_H
3 #define _RAID10_H
4
5 /* Note: raid10_info.rdev can be set to NULL asynchronously by
6  * raid10_remove_disk.
7  * There are three safe ways to access raid10_info.rdev.
8  * 1/ when holding mddev->reconfig_mutex
9  * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
10  *    that is called as part of performing resync/recovery/reshape.
11  * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
12  *    and if it is non-NULL, increment rdev->nr_pending before dropping the
13  *    RCU lock.
14  * When .rdev is set to NULL, the nr_pending count checked again and if it has
15  * been incremented, the pointer is put back in .rdev.
16  */
17
18 struct raid10_info {
19         struct md_rdev  *rdev, *replacement;
20         sector_t        head_position;
21         int             recovery_disabled;      /* matches
22                                                  * mddev->recovery_disabled
23                                                  * when we shouldn't try
24                                                  * recovering this device.
25                                                  */
26 };
27
28 struct r10conf {
29         struct mddev            *mddev;
30         struct raid10_info      *mirrors;
31         struct raid10_info      *mirrors_new, *mirrors_old;
32         spinlock_t              device_lock;
33
34         /* geometry */
35         struct geom {
36                 int             raid_disks;
37                 int             near_copies;  /* number of copies laid out
38                                                * raid0 style */
39                 int             far_copies;   /* number of copies laid out
40                                                * at large strides across drives
41                                                */
42                 int             far_offset;   /* far_copies are offset by 1
43                                                * stripe instead of many
44                                                */
45                 sector_t        stride;       /* distance between far copies.
46                                                * This is size / far_copies unless
47                                                * far_offset, in which case it is
48                                                * 1 stripe.
49                                                */
50                 int             far_set_size; /* The number of devices in a set,
51                                                * where a 'set' are devices that
52                                                * contain far/offset copies of
53                                                * each other.
54                                                */
55                 int             chunk_shift; /* shift from chunks to sectors */
56                 sector_t        chunk_mask;
57         } prev, geo;
58         int                     copies;       /* near_copies * far_copies.
59                                                * must be <= raid_disks
60                                                */
61
62         sector_t                dev_sectors;  /* temp copy of
63                                                * mddev->dev_sectors */
64         sector_t                reshape_progress;
65         sector_t                reshape_safe;
66         unsigned long           reshape_checkpoint;
67         sector_t                offset_diff;
68
69         struct list_head        retry_list;
70         /* A separate list of r1bio which just need raid_end_bio_io called.
71          * This mustn't happen for writes which had any errors if the superblock
72          * needs to be written.
73          */
74         struct list_head        bio_end_io_list;
75
76         /* queue pending writes and submit them on unplug */
77         struct bio_list         pending_bio_list;
78         int                     pending_count;
79
80         spinlock_t              resync_lock;
81         atomic_t                nr_pending;
82         int                     nr_waiting;
83         int                     nr_queued;
84         int                     barrier;
85         int                     array_freeze_pending;
86         sector_t                next_resync;
87         int                     fullsync;  /* set to 1 if a full sync is needed,
88                                             * (fresh device added).
89                                             * Cleared when a sync completes.
90                                             */
91         int                     have_replacement; /* There is at least one
92                                                    * replacement device.
93                                                    */
94         wait_queue_head_t       wait_barrier;
95
96         mempool_t               *r10bio_pool;
97         mempool_t               *r10buf_pool;
98         struct page             *tmppage;
99         struct bio_set          *bio_split;
100
101         /* When taking over an array from a different personality, we store
102          * the new thread here until we fully activate the array.
103          */
104         struct md_thread        *thread;
105
106         /*
107          * Keep track of cluster resync window to send to other nodes.
108          */
109         sector_t                cluster_sync_low;
110         sector_t                cluster_sync_high;
111 };
112
113 /*
114  * this is our 'private' RAID10 bio.
115  *
116  * it contains information about what kind of IO operations were started
117  * for this RAID10 operation, and about their status:
118  */
119
120 struct r10bio {
121         atomic_t                remaining; /* 'have we finished' count,
122                                             * used from IRQ handlers
123                                             */
124         sector_t                sector; /* virtual sector number */
125         int                     sectors;
126         unsigned long           state;
127         struct mddev            *mddev;
128         /*
129          * original bio going to /dev/mdx
130          */
131         struct bio              *master_bio;
132         /*
133          * if the IO is in READ direction, then this is where we read
134          */
135         int                     read_slot;
136
137         struct list_head        retry_list;
138         /*
139          * if the IO is in WRITE direction, then multiple bios are used,
140          * one for each copy.
141          * When resyncing we also use one for each copy.
142          * When reconstructing, we use 2 bios, one for read, one for write.
143          * We choose the number when they are allocated.
144          * We sometimes need an extra bio to write to the replacement.
145          */
146         struct r10dev {
147                 struct bio      *bio;
148                 union {
149                         struct bio      *repl_bio; /* used for resync and
150                                                     * writes */
151                         struct md_rdev  *rdev;     /* used for reads
152                                                     * (read_slot >= 0) */
153                 };
154                 sector_t        addr;
155                 int             devnum;
156         } devs[0];
157 };
158
159 /* bits for r10bio.state */
160 enum r10bio_state {
161         R10BIO_Uptodate,
162         R10BIO_IsSync,
163         R10BIO_IsRecover,
164         R10BIO_IsReshape,
165         R10BIO_Degraded,
166 /* Set ReadError on bios that experience a read error
167  * so that raid10d knows what to do with them.
168  */
169         R10BIO_ReadError,
170 /* If a write for this request means we can clear some
171  * known-bad-block records, we set this flag.
172  */
173         R10BIO_MadeGood,
174         R10BIO_WriteError,
175 /* During a reshape we might be performing IO on the
176  * 'previous' part of the array, in which case this
177  * flag is set
178  */
179         R10BIO_Previous,
180 /* failfast devices did receive failfast requests. */
181         R10BIO_FailFast,
182 };
183 #endif