4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 #include "system/network.h"
27 #include "lib/util/debug.h"
29 #include "common/logging.h"
30 #include "common/rb_tree.h"
32 #include "protocol/protocol_util.h"
34 #include "server/ipalloc_private.h"
36 /* Initialise main ipalloc state and sub-structures */
37 struct ipalloc_state *
38 ipalloc_state_init(TALLOC_CTX *mem_ctx,
40 enum ipalloc_algorithm algorithm,
43 bool no_ip_host_on_all_disabled,
44 uint32_t *force_rebalance_nodes)
46 struct ipalloc_state *ipalloc_state =
47 talloc_zero(mem_ctx, struct ipalloc_state);
48 if (ipalloc_state == NULL) {
49 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
53 ipalloc_state->num = num_nodes;
55 ipalloc_state->noiphost = bitmap_talloc(ipalloc_state,
57 if (ipalloc_state->noiphost == NULL) {
58 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
62 ipalloc_state->algorithm = algorithm;
63 ipalloc_state->no_ip_takeover = no_ip_takeover;
64 ipalloc_state->no_ip_failback = no_ip_failback;
65 ipalloc_state->no_ip_host_on_all_disabled = no_ip_host_on_all_disabled;
66 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
70 talloc_free(ipalloc_state);
74 static void *add_ip_callback(void *parm, void *data)
76 struct public_ip_list *this_ip = parm;
77 struct public_ip_list *prev_ip = data;
79 if (prev_ip == NULL) {
82 if (this_ip->pnn == -1) {
83 this_ip->pnn = prev_ip->pnn;
89 static int getips_count_callback(void *param, void *data)
91 struct public_ip_list **ip_list = (struct public_ip_list **)param;
92 struct public_ip_list *new_ip = (struct public_ip_list *)data;
94 new_ip->next = *ip_list;
99 /* Nodes only know about those public addresses that they are
100 * configured to serve and no individual node has a full list of all
101 * public addresses configured across the cluster. Therefore, a
102 * merged list of all public addresses needs to be built so that IP
103 * allocation can be done. */
104 static struct public_ip_list *
105 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
108 struct public_ip_list *ip_list;
109 struct ctdb_public_ip_list *public_ips;
110 struct trbt_tree *ip_tree;
112 ip_tree = trbt_create(ipalloc_state, 0);
114 if (ipalloc_state->known_public_ips == NULL) {
115 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
119 for (i=0; i < ipalloc_state->num; i++) {
121 public_ips = &ipalloc_state->known_public_ips[i];
123 for (j=0; j < public_ips->num; j++) {
124 struct public_ip_list *tmp_ip;
126 /* This is returned as part of ip_list */
127 tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
128 if (tmp_ip == NULL) {
130 (__location__ " out of memory\n"));
131 talloc_free(ip_tree);
135 /* Do not use information about IP addresses hosted
136 * on other nodes, it may not be accurate */
137 if (public_ips->ip[j].pnn == i) {
138 tmp_ip->pnn = public_ips->ip[j].pnn;
142 tmp_ip->addr = public_ips->ip[j].addr;
145 trbt_insertarray32_callback(ip_tree,
146 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
153 trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
154 talloc_free(ip_tree);
159 static bool populate_bitmap(struct ipalloc_state *ipalloc_state)
161 struct public_ip_list *ip = NULL;
164 for (ip = ipalloc_state->all_ips; ip != NULL; ip = ip->next) {
166 ip->known_on = bitmap_talloc(ip, ipalloc_state->num);
167 if (ip->known_on == NULL) {
171 ip->available_on = bitmap_talloc(ip, ipalloc_state->num);
172 if (ip->available_on == NULL) {
176 for (i = 0; i < ipalloc_state->num; i++) {
177 struct ctdb_public_ip_list *known =
178 &ipalloc_state->known_public_ips[i];
179 struct ctdb_public_ip_list *avail =
180 &ipalloc_state->available_public_ips[i];
182 /* Check to see if "ip" is available on node "i" */
183 for (j = 0; j < avail->num; j++) {
184 if (ctdb_sock_addr_same_ip(
185 &ip->addr, &avail->ip[j].addr)) {
186 bitmap_set(ip->available_on, i);
191 /* Optimisation: available => known */
192 if (bitmap_query(ip->available_on, i)) {
193 bitmap_set(ip->known_on, i);
197 /* Check to see if "ip" is known on node "i" */
198 for (j = 0; j < known->num; j++) {
199 if (ctdb_sock_addr_same_ip(
200 &ip->addr, &known->ip[j].addr)) {
201 bitmap_set(ip->known_on, i);
211 static bool all_nodes_are_disabled(struct ctdb_node_map *nodemap)
215 for (i=0;i<nodemap->num;i++) {
216 if (!(nodemap->node[i].flags &
217 (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
218 /* Found one completely healthy node */
226 /* Set internal flags for IP allocation:
228 * Set NOIPHOST ip flag for each INACTIVE node
229 * if all nodes are disabled:
230 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
232 * Set NOIPHOST ip flags for disabled nodes
234 void ipalloc_set_node_flags(struct ipalloc_state *ipalloc_state,
235 struct ctdb_node_map *nodemap)
238 bool all_disabled = all_nodes_are_disabled(nodemap);
240 for (i=0;i<nodemap->num;i++) {
241 /* Can not host IPs on INACTIVE node */
242 if (nodemap->node[i].flags & NODE_FLAGS_INACTIVE) {
243 bitmap_set(ipalloc_state->noiphost, i);
246 /* If node is disabled then it can only host IPs if
247 * all nodes are disabled and NoIPHostOnAllDisabled is
250 if (nodemap->node[i].flags & NODE_FLAGS_DISABLED) {
251 if (!(all_disabled &&
252 ipalloc_state->no_ip_host_on_all_disabled == 0)) {
254 bitmap_set(ipalloc_state->noiphost, i);
260 void ipalloc_set_public_ips(struct ipalloc_state *ipalloc_state,
261 struct ctdb_public_ip_list *known_ips,
262 struct ctdb_public_ip_list *available_ips)
264 ipalloc_state->available_public_ips = available_ips;
265 ipalloc_state->known_public_ips = known_ips;
268 /* This can only return false if there are no available IPs *and*
269 * there are no IP addresses currently allocated. If the latter is
270 * true then the cluster can clearly host IPs... just not necessarily
272 bool ipalloc_can_host_ips(struct ipalloc_state *ipalloc_state)
275 bool have_ips = false;
277 for (i=0; i < ipalloc_state->num; i++) {
278 struct ctdb_public_ip_list *ips =
279 ipalloc_state->known_public_ips;
280 if (ips[i].num != 0) {
283 /* Succeed if an address is hosted on node i */
284 for (j=0; j < ips[i].num; j++) {
285 if (ips[i].ip[j].pnn == i) {
296 /* At this point there are known addresses but none are
297 * hosted. Need to check if cluster can now host some
300 for (i=0; i < ipalloc_state->num; i++) {
301 if (ipalloc_state->available_public_ips[i].num != 0) {
309 /* The calculation part of the IP allocation algorithm. */
310 struct public_ip_list *ipalloc(struct ipalloc_state *ipalloc_state)
314 ipalloc_state->all_ips = create_merged_ip_list(ipalloc_state);
315 if (ipalloc_state->all_ips == NULL) {
319 if (!populate_bitmap(ipalloc_state)) {
323 switch (ipalloc_state->algorithm) {
325 ret = ipalloc_lcp2(ipalloc_state);
327 case IPALLOC_DETERMINISTIC:
328 ret = ipalloc_deterministic(ipalloc_state);
330 case IPALLOC_NONDETERMINISTIC:
331 ret = ipalloc_nondeterministic(ipalloc_state);
335 /* at this point ->pnn is the node which will own each IP
336 or -1 if there is no node that can cover this ip
339 return (ret ? ipalloc_state->all_ips : NULL);