8b1f4f04210a50fe0d8ec7383239f38f3b5bbae0
[sfrench/samba-autobuild/.git] / ctdb / server / ipalloc.c
1 /*
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <talloc.h>
23
24 #include "replace.h"
25 #include "system/network.h"
26
27 #include "lib/util/debug.h"
28
29 #include "common/logging.h"
30 #include "common/rb_tree.h"
31
32 #include "protocol/protocol_api.h"
33
34 #include "server/ipalloc_private.h"
35
36 /* Initialise main ipalloc state and sub-structures */
37 struct ipalloc_state *
38 ipalloc_state_init(TALLOC_CTX *mem_ctx,
39                    uint32_t num_nodes,
40                    enum ipalloc_algorithm algorithm,
41                    bool no_ip_takeover,
42                    bool no_ip_failback,
43                    bool no_ip_host_on_all_disabled,
44                    uint32_t *force_rebalance_nodes)
45 {
46         struct ipalloc_state *ipalloc_state =
47                 talloc_zero(mem_ctx, struct ipalloc_state);
48         if (ipalloc_state == NULL) {
49                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
50                 return NULL;
51         }
52
53         ipalloc_state->num = num_nodes;
54
55         ipalloc_state->noiphost =
56                 talloc_zero_array(ipalloc_state,
57                                   bool,
58                                   ipalloc_state->num);
59         if (ipalloc_state->noiphost == NULL) {
60                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
61                 goto fail;
62         }
63
64         ipalloc_state->algorithm = algorithm;
65         ipalloc_state->no_ip_takeover = no_ip_takeover;
66         ipalloc_state->no_ip_failback = no_ip_failback;
67         ipalloc_state->no_ip_host_on_all_disabled = no_ip_host_on_all_disabled;
68         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
69
70         return ipalloc_state;
71 fail:
72         talloc_free(ipalloc_state);
73         return NULL;
74 }
75
76 static void *add_ip_callback(void *parm, void *data)
77 {
78         struct public_ip_list *this_ip = parm;
79         struct public_ip_list *prev_ip = data;
80
81         if (prev_ip == NULL) {
82                 return parm;
83         }
84         if (this_ip->pnn == -1) {
85                 this_ip->pnn = prev_ip->pnn;
86         }
87
88         return parm;
89 }
90
91 static int getips_count_callback(void *param, void *data)
92 {
93         struct public_ip_list **ip_list = (struct public_ip_list **)param;
94         struct public_ip_list *new_ip = (struct public_ip_list *)data;
95
96         new_ip->next = *ip_list;
97         *ip_list     = new_ip;
98         return 0;
99 }
100
101 /* Nodes only know about those public addresses that they are
102  * configured to serve and no individual node has a full list of all
103  * public addresses configured across the cluster.  Therefore, a
104  * merged list of all public addresses needs to be built so that IP
105  * allocation can be done. */
106 static struct public_ip_list *
107 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
108 {
109         int i, j;
110         struct public_ip_list *ip_list;
111         struct ctdb_public_ip_list *public_ips;
112         struct trbt_tree *ip_tree;
113
114         ip_tree = trbt_create(ipalloc_state, 0);
115
116         if (ipalloc_state->known_public_ips == NULL) {
117                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
118                 return NULL;
119         }
120
121         for (i=0; i < ipalloc_state->num; i++) {
122
123                 public_ips = &ipalloc_state->known_public_ips[i];
124
125                 for (j=0; j < public_ips->num; j++) {
126                         struct public_ip_list *tmp_ip;
127
128                         /* This is returned as part of ip_list */
129                         tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
130                         if (tmp_ip == NULL) {
131                                 DEBUG(DEBUG_ERR,
132                                       (__location__ " out of memory\n"));
133                                 talloc_free(ip_tree);
134                                 return NULL;
135                         }
136
137                         /* Do not use information about IP addresses hosted
138                          * on other nodes, it may not be accurate */
139                         if (public_ips->ip[j].pnn == i) {
140                                 tmp_ip->pnn = public_ips->ip[j].pnn;
141                         } else {
142                                 tmp_ip->pnn = -1;
143                         }
144                         tmp_ip->addr = public_ips->ip[j].addr;
145                         tmp_ip->next = NULL;
146
147                         trbt_insertarray32_callback(ip_tree,
148                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
149                                 add_ip_callback,
150                                 tmp_ip);
151                 }
152         }
153
154         ip_list = NULL;
155         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
156         talloc_free(ip_tree);
157
158         return ip_list;
159 }
160
161 static bool populate_bitmap(struct ipalloc_state *ipalloc_state)
162 {
163         struct public_ip_list *ip = NULL;
164         int i, j;
165
166         for (ip = ipalloc_state->all_ips; ip != NULL; ip = ip->next) {
167
168                 ip->available_on = talloc_zero_array(ip, bool,
169                                                      ipalloc_state->num);
170                 if (ip->available_on == NULL) {
171                         return false;
172                 }
173
174                 for (i = 0; i < ipalloc_state->num; i++) {
175                         struct ctdb_public_ip_list *avail =
176                                 &ipalloc_state->available_public_ips[i];
177
178                         /* Check to see if "ip" is available on node "i" */
179                         for (j = 0; j < avail->num; j++) {
180                                 if (ctdb_sock_addr_same_ip(
181                                             &ip->addr, &avail->ip[j].addr)) {
182                                         ip->available_on[i] = true;
183                                         break;
184                                 }
185                         }
186                 }
187         }
188
189         return true;
190 }
191
192 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
193 {
194         int i;
195
196         for (i=0;i<nodemap->num;i++) {
197                 if (!(nodemap->node[i].flags &
198                       (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
199                         /* Found one completely healthy node */
200                         return false;
201                 }
202         }
203
204         return true;
205 }
206
207 /* Set internal flags for IP allocation:
208  *   Clear ip flags
209  *   Set NOIPHOST ip flag for each INACTIVE node
210  *   if all nodes are disabled:
211  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
212  *   else
213  *     Set NOIPHOST ip flags for disabled nodes
214  */
215 void ipalloc_set_node_flags(struct ipalloc_state *ipalloc_state,
216                             struct ctdb_node_map *nodemap)
217 {
218         int i;
219         bool all_disabled = all_nodes_are_disabled(nodemap);
220
221         for (i=0;i<nodemap->num;i++) {
222                 /* Can not host IPs on INACTIVE node */
223                 if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
224                         ipalloc_state->noiphost[i] = true;
225                 }
226
227                 /* If node is disabled then it can only host IPs if
228                  * all nodes are disabled and NoIPHostOnAllDisabled is
229                  * unset
230                  */
231                 if (nodemap->node[i].flags & NODE_FLAGS_DISABLED) {
232                         if (!(all_disabled &&
233                               ipalloc_state->no_ip_host_on_all_disabled == 0)) {
234
235                                 ipalloc_state->noiphost[i] = true;
236                         }
237                 }
238         }
239 }
240
241 void ipalloc_set_public_ips(struct ipalloc_state *ipalloc_state,
242                             struct ctdb_public_ip_list *known_ips,
243                             struct ctdb_public_ip_list *available_ips)
244 {
245         ipalloc_state->available_public_ips = available_ips;
246         ipalloc_state->known_public_ips = known_ips;
247 }
248
249 /* This can only return false if there are no available IPs *and*
250  * there are no IP addresses currently allocated.  If the latter is
251  * true then the cluster can clearly host IPs... just not necessarily
252  * right now... */
253 bool ipalloc_can_host_ips(struct ipalloc_state *ipalloc_state)
254 {
255         int i;
256         bool have_ips = false;
257
258         for (i=0; i < ipalloc_state->num; i++) {
259                 struct ctdb_public_ip_list *ips =
260                         ipalloc_state->known_public_ips;
261                 if (ips[i].num != 0) {
262                         int j;
263                         have_ips = true;
264                         /* Succeed if an address is hosted on node i */
265                         for (j=0; j < ips[i].num; j++) {
266                                 if (ips[i].ip[j].pnn == i) {
267                                         return true;
268                                 }
269                         }
270                 }
271         }
272
273         if (! have_ips) {
274                 return false;
275         }
276
277         /* At this point there are known addresses but none are
278          * hosted.  Need to check if cluster can now host some
279          * addresses.
280          */
281         for (i=0; i < ipalloc_state->num; i++) {
282                 if (ipalloc_state->available_public_ips[i].num != 0) {
283                         return true;
284                 }
285         }
286
287         return false;
288 }
289
290 /* The calculation part of the IP allocation algorithm. */
291 struct public_ip_list *ipalloc(struct ipalloc_state *ipalloc_state)
292 {
293         bool ret = false;
294
295         ipalloc_state->all_ips = create_merged_ip_list(ipalloc_state);
296         if (ipalloc_state->all_ips == NULL) {
297                 return NULL;
298         }
299
300         if (!populate_bitmap(ipalloc_state)) {
301                 return NULL;
302         }
303
304         switch (ipalloc_state->algorithm) {
305         case IPALLOC_LCP2:
306                 ret = ipalloc_lcp2(ipalloc_state);
307                 break;
308         case IPALLOC_DETERMINISTIC:
309                 ret = ipalloc_deterministic(ipalloc_state);
310                 break;
311         case IPALLOC_NONDETERMINISTIC:
312                 ret = ipalloc_nondeterministic(ipalloc_state);
313                break;
314         }
315
316         /* at this point ->pnn is the node which will own each IP
317            or -1 if there is no node that can cover this ip
318         */
319
320         return (ret ? ipalloc_state->all_ips : NULL);
321 }