caa50224156eb7a1b8eea5c6e03102c6a7d6fee8
[vlendec/samba-autobuild/.git] / ctdb / server / ipalloc.c
1 /*
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <talloc.h>
23
24 #include "replace.h"
25 #include "system/network.h"
26
27 #include "lib/util/debug.h"
28
29 #include "common/logging.h"
30 #include "common/rb_tree.h"
31
32 #include "protocol/protocol_util.h"
33
34 #include "server/ipalloc_private.h"
35
36 /* Initialise main ipalloc state and sub-structures */
37 struct ipalloc_state *
38 ipalloc_state_init(TALLOC_CTX *mem_ctx,
39                    uint32_t num_nodes,
40                    enum ipalloc_algorithm algorithm,
41                    bool no_ip_takeover,
42                    bool no_ip_failback,
43                    bool no_ip_host_on_all_disabled,
44                    uint32_t *force_rebalance_nodes)
45 {
46         struct ipalloc_state *ipalloc_state =
47                 talloc_zero(mem_ctx, struct ipalloc_state);
48         if (ipalloc_state == NULL) {
49                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
50                 return NULL;
51         }
52
53         ipalloc_state->num = num_nodes;
54
55         ipalloc_state->noiphost = bitmap_talloc(ipalloc_state,
56                                                 ipalloc_state->num);
57         if (ipalloc_state->noiphost == NULL) {
58                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
59                 goto fail;
60         }
61
62         ipalloc_state->algorithm = algorithm;
63         ipalloc_state->no_ip_takeover = no_ip_takeover;
64         ipalloc_state->no_ip_failback = no_ip_failback;
65         ipalloc_state->no_ip_host_on_all_disabled = no_ip_host_on_all_disabled;
66         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
67
68         return ipalloc_state;
69 fail:
70         talloc_free(ipalloc_state);
71         return NULL;
72 }
73
74 static void *add_ip_callback(void *parm, void *data)
75 {
76         struct public_ip_list *this_ip = parm;
77         struct public_ip_list *prev_ip = data;
78
79         if (prev_ip == NULL) {
80                 return parm;
81         }
82         if (this_ip->pnn == -1) {
83                 this_ip->pnn = prev_ip->pnn;
84         }
85
86         return parm;
87 }
88
89 static int getips_count_callback(void *param, void *data)
90 {
91         struct public_ip_list **ip_list = (struct public_ip_list **)param;
92         struct public_ip_list *new_ip = (struct public_ip_list *)data;
93
94         new_ip->next = *ip_list;
95         *ip_list     = new_ip;
96         return 0;
97 }
98
99 /* Nodes only know about those public addresses that they are
100  * configured to serve and no individual node has a full list of all
101  * public addresses configured across the cluster.  Therefore, a
102  * merged list of all public addresses needs to be built so that IP
103  * allocation can be done. */
104 static struct public_ip_list *
105 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
106 {
107         int i, j;
108         struct public_ip_list *ip_list;
109         struct ctdb_public_ip_list *public_ips;
110         struct trbt_tree *ip_tree;
111
112         ip_tree = trbt_create(ipalloc_state, 0);
113
114         if (ipalloc_state->known_public_ips == NULL) {
115                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
116                 return NULL;
117         }
118
119         for (i=0; i < ipalloc_state->num; i++) {
120
121                 public_ips = &ipalloc_state->known_public_ips[i];
122
123                 for (j=0; j < public_ips->num; j++) {
124                         struct public_ip_list *tmp_ip;
125
126                         /* This is returned as part of ip_list */
127                         tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
128                         if (tmp_ip == NULL) {
129                                 DEBUG(DEBUG_ERR,
130                                       (__location__ " out of memory\n"));
131                                 talloc_free(ip_tree);
132                                 return NULL;
133                         }
134
135                         /* Do not use information about IP addresses hosted
136                          * on other nodes, it may not be accurate */
137                         if (public_ips->ip[j].pnn == i) {
138                                 tmp_ip->pnn = public_ips->ip[j].pnn;
139                         } else {
140                                 tmp_ip->pnn = -1;
141                         }
142                         tmp_ip->addr = public_ips->ip[j].addr;
143                         tmp_ip->next = NULL;
144
145                         trbt_insertarray32_callback(ip_tree,
146                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
147                                 add_ip_callback,
148                                 tmp_ip);
149                 }
150         }
151
152         ip_list = NULL;
153         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
154         talloc_free(ip_tree);
155
156         return ip_list;
157 }
158
159 static bool populate_bitmap(struct ipalloc_state *ipalloc_state)
160 {
161         struct public_ip_list *ip = NULL;
162         int i, j;
163
164         for (ip = ipalloc_state->all_ips; ip != NULL; ip = ip->next) {
165
166                 ip->known_on = bitmap_talloc(ip, ipalloc_state->num);
167                 if (ip->known_on == NULL) {
168                         return false;
169                 }
170
171                 ip->available_on = bitmap_talloc(ip, ipalloc_state->num);
172                 if (ip->available_on == NULL) {
173                         return false;
174                 }
175
176                 for (i = 0; i < ipalloc_state->num; i++) {
177                         struct ctdb_public_ip_list *known =
178                                 &ipalloc_state->known_public_ips[i];
179                         struct ctdb_public_ip_list *avail =
180                                 &ipalloc_state->available_public_ips[i];
181
182                         /* Check to see if "ip" is available on node "i" */
183                         for (j = 0; j < avail->num; j++) {
184                                 if (ctdb_sock_addr_same_ip(
185                                             &ip->addr, &avail->ip[j].addr)) {
186                                         bitmap_set(ip->available_on, i);
187                                         break;
188                                 }
189                         }
190
191                         /* Optimisation: available => known */
192                         if (bitmap_query(ip->available_on, i)) {
193                                 bitmap_set(ip->known_on, i);
194                                 continue;
195                         }
196
197                         /* Check to see if "ip" is known on node "i" */
198                         for (j = 0; j < known->num; j++) {
199                                 if (ctdb_sock_addr_same_ip(
200                                             &ip->addr, &known->ip[j].addr)) {
201                                         bitmap_set(ip->known_on, i);
202                                         break;
203                                 }
204                         }
205                 }
206         }
207
208         return true;
209 }
210
211 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
212 {
213         int i;
214
215         for (i=0;i<nodemap->num;i++) {
216                 if (!(nodemap->node[i].flags &
217                       (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
218                         /* Found one completely healthy node */
219                         return false;
220                 }
221         }
222
223         return true;
224 }
225
226 /* Set internal flags for IP allocation:
227  *   Clear ip flags
228  *   Set NOIPHOST ip flag for each INACTIVE node
229  *   if all nodes are disabled:
230  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
231  *   else
232  *     Set NOIPHOST ip flags for disabled nodes
233  */
234 void ipalloc_set_node_flags(struct ipalloc_state *ipalloc_state,
235                             struct ctdb_node_map *nodemap)
236 {
237         int i;
238         bool all_disabled = all_nodes_are_disabled(nodemap);
239
240         for (i=0;i<nodemap->num;i++) {
241                 /* Can not host IPs on INACTIVE node */
242                 if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
243                         bitmap_set(ipalloc_state->noiphost, i);
244                 }
245
246                 /* If node is disabled then it can only host IPs if
247                  * all nodes are disabled and NoIPHostOnAllDisabled is
248                  * unset
249                  */
250                 if (nodemap->node[i].flags & NODE_FLAGS_DISABLED) {
251                         if (!(all_disabled &&
252                               ipalloc_state->no_ip_host_on_all_disabled == 0)) {
253
254                                 bitmap_set(ipalloc_state->noiphost, i);
255                         }
256                 }
257         }
258 }
259
260 void ipalloc_set_public_ips(struct ipalloc_state *ipalloc_state,
261                             struct ctdb_public_ip_list *known_ips,
262                             struct ctdb_public_ip_list *available_ips)
263 {
264         ipalloc_state->available_public_ips = available_ips;
265         ipalloc_state->known_public_ips = known_ips;
266 }
267
268 /* This can only return false if there are no available IPs *and*
269  * there are no IP addresses currently allocated.  If the latter is
270  * true then the cluster can clearly host IPs... just not necessarily
271  * right now... */
272 bool ipalloc_can_host_ips(struct ipalloc_state *ipalloc_state)
273 {
274         int i;
275         bool have_ips = false;
276
277         for (i=0; i < ipalloc_state->num; i++) {
278                 struct ctdb_public_ip_list *ips =
279                         ipalloc_state->known_public_ips;
280                 if (ips[i].num != 0) {
281                         int j;
282                         have_ips = true;
283                         /* Succeed if an address is hosted on node i */
284                         for (j=0; j < ips[i].num; j++) {
285                                 if (ips[i].ip[j].pnn == i) {
286                                         return true;
287                                 }
288                         }
289                 }
290         }
291
292         if (! have_ips) {
293                 return false;
294         }
295
296         /* At this point there are known addresses but none are
297          * hosted.  Need to check if cluster can now host some
298          * addresses.
299          */
300         for (i=0; i < ipalloc_state->num; i++) {
301                 if (ipalloc_state->available_public_ips[i].num != 0) {
302                         return true;
303                 }
304         }
305
306         return false;
307 }
308
309 /* The calculation part of the IP allocation algorithm. */
310 struct public_ip_list *ipalloc(struct ipalloc_state *ipalloc_state)
311 {
312         bool ret = false;
313
314         ipalloc_state->all_ips = create_merged_ip_list(ipalloc_state);
315         if (ipalloc_state->all_ips == NULL) {
316                 return NULL;
317         }
318
319         if (!populate_bitmap(ipalloc_state)) {
320                 return NULL;
321         }
322
323         switch (ipalloc_state->algorithm) {
324         case IPALLOC_LCP2:
325                 ret = ipalloc_lcp2(ipalloc_state);
326                 break;
327         case IPALLOC_DETERMINISTIC:
328                 ret = ipalloc_deterministic(ipalloc_state);
329                 break;
330         case IPALLOC_NONDETERMINISTIC:
331                 ret = ipalloc_nondeterministic(ipalloc_state);
332                break;
333         }
334
335         /* at this point ->pnn is the node which will own each IP
336            or -1 if there is no node that can cover this ip
337         */
338
339         return (ret ? ipalloc_state->all_ips : NULL);
340 }