ctdb-tools: Improve error handling
[samba.git] / ctdb / tools / ctdb_killtcp.c
1 /*
2    CTDB TCP connection killing utility
3
4    Copyright (C) Martin Schwenke <martin@meltin.net> 2016
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <talloc.h>
21 #include <tevent.h>
22
23 #include "replace.h"
24 #include "system/network.h"
25
26 #include "lib/util/debug.h"
27
28 #include "protocol/protocol.h"
29 #include "protocol/protocol_util.h"
30
31 #include "common/db_hash.h"
32 #include "common/system.h"
33 #include "common/logging.h"
34
35
36 /* Contains the listening socket and the list of TCP connections to
37  * kill */
38 struct ctdb_kill_tcp {
39         int capture_fd;
40         struct tevent_fd *fde;
41         struct db_hash_context *connections;
42         void *private_data;
43         void *destructor_data;
44         unsigned int attempts;
45         unsigned int max_attempts;
46         struct timeval retry_interval;
47         unsigned int batch_count;
48         unsigned int batch_size;
49 };
50
51
52 /*
53   called when we get a read event on the raw socket
54  */
55 static void capture_tcp_handler(struct tevent_context *ev,
56                                 struct tevent_fd *fde,
57                                 uint16_t flags, void *private_data)
58 {
59         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
60         /* 0 the parts that don't get set by ctdb_sys_read_tcp_packet */
61         struct ctdb_connection conn;
62         uint32_t ack_seq, seq;
63         int rst;
64         uint16_t window;
65         int ret;
66
67         ret = ctdb_sys_read_tcp_packet(killtcp->capture_fd,
68                                        killtcp->private_data,
69                                        &conn.server, &conn.client,
70                                        &ack_seq, &seq, &rst, &window);
71         if (ret != 0) {
72                 /* probably a non-tcp ACK packet */
73                 return;
74         }
75
76         if (window == htons(1234) && (rst || seq == 0)) {
77                 /* Ignore packets that we sent! */
78                 D_DEBUG("Ignoring packet: %s, "
79                         "seq=%"PRIu32", ack_seq=%"PRIu32", "
80                         "rst=%d, window=%"PRIu16"\n",
81                         ctdb_connection_to_string(killtcp, &conn, false),
82                         seq, ack_seq, rst, ntohs(window));
83                 return;
84         }
85
86         /* Check if this connection is one being reset, if found then delete */
87         ret = db_hash_delete(killtcp->connections,
88                              (uint8_t*)&conn, sizeof(conn));
89         if (ret == ENOENT) {
90                 /* Packet for some other connection, ignore */
91                 return;
92         }
93         if (ret != 0) {
94                 DBG_WARNING("Internal error (%s)\n", strerror(ret));
95                 return;
96         }
97
98         D_INFO("Sending a TCP RST to kill connection %s\n",
99                ctdb_connection_to_string(killtcp, &conn, true));
100
101         ret = ctdb_sys_send_tcp(&conn.server, &conn.client, ack_seq, seq, 1);
102         if (ret != 0) {
103                 DBG_ERR("Error sending TCP RST for connection\n");
104         }
105 }
106
107
108 static int tickle_connection_parser(uint8_t *keybuf, size_t keylen,
109                                     uint8_t *databuf, size_t datalen,
110                                     void *private_data)
111 {
112         struct ctdb_kill_tcp *killtcp = talloc_get_type_abort(
113                 private_data, struct ctdb_kill_tcp);
114         struct ctdb_connection *conn;
115         int ret;
116
117         if (keylen != sizeof(*conn)) {
118                 DBG_WARNING("Unexpected data in connection hash\n");
119                 return 0;
120         }
121
122         conn = (struct ctdb_connection *)keybuf;
123
124         killtcp->batch_count++;
125         if (killtcp->batch_count > killtcp->batch_size) {
126                 /* Terminate the traverse */
127                 return 1;
128         }
129
130         ret = ctdb_sys_send_tcp(&conn->server, &conn->client, 0, 0, 0);
131         if (ret != 0) {
132                 DBG_ERR("Error sending tickle ACK\n");
133                 /* continue */
134         }
135
136         return 0;
137 }
138
139 /*
140  * Called periodically until all sentenced connections have been reset
141  * or enough attempts have been made
142  */
143 static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
144                                               struct tevent_timer *te,
145                                               struct timeval t, void *private_data)
146 {
147         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
148         int count, ret;
149
150         /* loop over up to batch_size connections sending tickle ACKs */
151         killtcp->batch_count = 0;
152         ret = db_hash_traverse(killtcp->connections,
153                                tickle_connection_parser, killtcp, NULL);
154         if (ret != 0) {
155                 DBG_WARNING("Unexpected error traversing connections (%s)\n",
156                             strerror(ret));
157         }
158
159         killtcp->attempts++;
160
161         /*
162          * If there are no more connections to kill or we have tried
163          * too many times we can remove the entire killtcp structure
164          */
165         ret = db_hash_traverse(killtcp->connections, NULL, NULL, &count);
166         if (ret != 0) {
167                 /* What now?  Try again until max_attempts reached */
168                 DBG_WARNING("Unexpected error traversing connections (%s)\n",
169                             strerror(ret));
170                 count = 1;
171         }
172         if (count == 0 ||
173             killtcp->attempts >= killtcp->max_attempts) {
174                 talloc_free(killtcp);
175                 return;
176         }
177
178         /* try tickling them again in a seconds time
179          */
180         tevent_add_timer(ev, killtcp,
181                          tevent_timeval_current_ofs(
182                                  killtcp->retry_interval.tv_sec,
183                                  killtcp->retry_interval.tv_usec),
184                          ctdb_tickle_sentenced_connections, killtcp);
185 }
186
187 /* Add a TCP socket to the list of connections we want to RST.  The
188  * list is attached to *killtcp_arg.  If this is NULL then allocate
189  * the structure.  */
190 static int ctdb_killtcp(struct tevent_context *ev,
191                         TALLOC_CTX *mem_ctx,
192                         const char *iface,
193                         struct ctdb_connection *conn,
194                         struct ctdb_kill_tcp **killtcp_arg)
195 {
196         struct ctdb_kill_tcp *killtcp;
197         int ret;
198
199         if (killtcp_arg == NULL) {
200                 DEBUG(DEBUG_ERR, (__location__ " killtcp_arg is NULL!\n"));
201                 return -1;
202         }
203
204         killtcp = *killtcp_arg;
205
206         /* Allocate a new structure if necessary.  The structure is
207          * only freed when mem_ctx is freed. */
208         if (killtcp == NULL) {
209                 killtcp = talloc_zero(mem_ctx, struct ctdb_kill_tcp);
210                 if (killtcp == NULL) {
211                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
212                         return -1;
213                 }
214
215                 killtcp->capture_fd  = -1;
216                 ret = db_hash_init(killtcp, "connections", 2048, DB_HASH_SIMPLE,
217                                    &killtcp->connections);
218                 if (ret != 0) {
219                         D_ERR("Failed to initialise connection hash (%s)\n",
220                               strerror(ret));
221                         talloc_free(killtcp);
222                         return -1;
223                 }
224
225                 killtcp->attempts = 0;
226                 killtcp->max_attempts = 50;
227
228                 killtcp->retry_interval.tv_sec = 0;
229                 killtcp->retry_interval.tv_usec = 100 * 1000;
230
231                 killtcp->batch_count = 0;
232                 killtcp->batch_size = 300;
233
234                 *killtcp_arg = killtcp;
235         }
236
237         /* Connection is stored as a key in the connections hash */
238         ret = db_hash_add(killtcp->connections,
239                           (uint8_t *)conn, sizeof(*conn),
240                           NULL, 0);
241         if (ret != 0) {
242                 D_ERR("Error adding connection to hash (%s)\n", strerror(ret));
243                 return -1;
244         }
245
246         /*
247            If we don't have a socket to listen on yet we must create it
248          */
249         if (killtcp->capture_fd == -1) {
250                 killtcp->capture_fd =
251                         ctdb_sys_open_capture_socket(iface,
252                                                      &killtcp->private_data);
253                 if (killtcp->capture_fd == -1) {
254                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
255                                           "socket on iface '%s' for killtcp (%s)\n",
256                                           iface, strerror(errno)));
257                         return -1;
258                 }
259         }
260
261
262         if (killtcp->fde == NULL) {
263                 killtcp->fde = tevent_add_fd(ev, killtcp,
264                                              killtcp->capture_fd,
265                                              TEVENT_FD_READ,
266                                              capture_tcp_handler, killtcp);
267                 tevent_fd_set_auto_close(killtcp->fde);
268         }
269
270         return 0;
271 }
272
273 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
274 {
275         bool *done = killtcp->destructor_data;
276         *done = true;
277
278         return 0;
279 }
280
281 static void usage(const char *prog)
282 {
283         printf("usage: %s <interface> [ <srcip:port> <dstip:port> ]\n", prog);
284         exit(1);
285 }
286
287 int main(int argc, char **argv)
288 {
289         struct ctdb_connection conn;
290         struct ctdb_kill_tcp *killtcp = NULL;
291         struct tevent_context *ev = NULL;
292         struct TALLOC_CONTEXT *mem_ctx = NULL;
293         struct ctdb_connection_list *conn_list = NULL;
294         const char *t;
295         int debug_level;
296         bool done;
297         int i, ret;
298
299         /* Set the debug level */
300         t = getenv("CTDB_DEBUGLEVEL");
301         if (t != NULL) {
302                 if (debug_level_parse(t, &debug_level)) {
303                         DEBUGLEVEL = debug_level;
304                 } else {
305                         DEBUGLEVEL = DEBUG_ERR;
306                 }
307         }
308
309         if (argc != 2 && argc != 4) {
310                 usage(argv[0]);
311         }
312
313         if (argc == 4) {
314                 ret = ctdb_sock_addr_from_string(argv[2], &conn.client, true);
315                 if (ret != 0) {
316                         D_ERR("Bad IP:port '%s'\n", argv[2]);
317                         goto fail;
318                 }
319
320                 ret = ctdb_sock_addr_from_string(argv[3], &conn.server, true);
321                 if (ret != 0) {
322                         D_ERR("Bad IP:port '%s'\n", argv[3]);
323                         goto fail;
324                 }
325
326
327                 conn_list = talloc_zero(mem_ctx, struct ctdb_connection_list);
328                 if (conn_list == NULL) {
329                         ret = ENOMEM;
330                         DBG_ERR("Internal error (%s)\n", strerror(ret));
331                         goto fail;
332                 }
333                 ret = ctdb_connection_list_add(conn_list, &conn);
334                 if (ret != 0) {
335                         DBG_ERR("Internal error (%s)\n", strerror(ret));
336                         goto fail;
337                 }
338         } else {
339                 ret = ctdb_connection_list_read(mem_ctx, true, &conn_list);
340                 if (ret != 0) {
341                         D_ERR("Unable to parse connections (%s)\n",
342                               strerror(ret));
343                         goto fail;
344                 }
345         }
346
347         mem_ctx = talloc_new(NULL);
348         if (mem_ctx == NULL) {
349                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
350                 goto fail;
351         }
352
353         ev = tevent_context_init(mem_ctx);
354         if (ev == NULL) {
355                 DEBUG(DEBUG_ERR, ("Failed to initialise tevent\n"));
356                 goto fail;
357         }
358
359         if (conn_list->num == 0) {
360                 /* No connections, done! */
361                 talloc_free(mem_ctx);
362                 return 0;
363         }
364
365         for (i = 0; i < conn_list->num; i++) {
366                 ret = ctdb_killtcp(ev, mem_ctx, argv[1],
367                                    &conn_list->conn[i], &killtcp);
368                 if (ret != 0) {
369                         DEBUG(DEBUG_ERR, ("Unable to killtcp\n"));
370                         goto fail;
371                 }
372         }
373
374         done = false;
375         killtcp->destructor_data = &done;
376         talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
377
378         /* Do the initial processing of connections */
379         tevent_add_timer(ev, killtcp,
380                          tevent_timeval_current_ofs(0, 0),
381                          ctdb_tickle_sentenced_connections, killtcp);
382
383         while (!done) {
384                 tevent_loop_once(ev);
385         }
386
387         talloc_free(mem_ctx);
388
389         return 0;
390
391 fail:
392         TALLOC_FREE(mem_ctx);
393         return -1;
394 }