ctdb-tools: Add debug to ctdb_killtcp
[metze/samba/wip.git] / ctdb / tools / ctdb_killtcp.c
1 /*
2    CTDB TCP connection killing utility
3
4    Copyright (C) Martin Schwenke <martin@meltin.net> 2016
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <talloc.h>
21 #include <tevent.h>
22
23 #include "replace.h"
24 #include "system/network.h"
25
26 #include "lib/util/debug.h"
27 #include "lib/util/tevent_unix.h"
28
29 #include "protocol/protocol.h"
30 #include "protocol/protocol_util.h"
31
32 #include "common/db_hash.h"
33 #include "common/system.h"
34 #include "common/logging.h"
35
36
37 struct reset_connections_state {
38         struct tevent_context *ev;
39         int capture_fd;
40         struct tevent_fd *fde;
41         struct db_hash_context *connections;
42         void *private_data;
43         unsigned int attempts;
44         unsigned int max_attempts;
45         struct timeval retry_interval;
46         unsigned int batch_count;
47         unsigned int batch_size;
48 };
49
50
51 static void reset_connections_capture_tcp_handler(struct tevent_context *ev,
52                                                   struct tevent_fd *fde,
53                                                   uint16_t flags,
54                                                   void *private_data);
55 static void reset_connections_batch(struct tevent_req *subreq);
56 static int reset_connections_tickle_connection(
57                                         uint8_t *keybuf, size_t keylen,
58                                         uint8_t *databuf, size_t datalen,
59                                         void *private_data);
60
61 static struct tevent_req *reset_connections_send(
62                               TALLOC_CTX *mem_ctx,
63                               struct tevent_context *ev,
64                               const char *iface,
65                               struct ctdb_connection_list *conn_list)
66 {
67         struct tevent_req *req, *subreq;
68         struct reset_connections_state *state;
69         int i, ret;
70
71         req = tevent_req_create(mem_ctx, &state,
72                                 struct reset_connections_state);
73         if (req == NULL) {
74                 return NULL;
75         }
76
77         state->ev = ev;
78
79         if (conn_list->num == 0) {
80                 /* No connections, done! */
81                 tevent_req_done(req);
82                 return tevent_req_post(req, ev);
83         }
84
85         ret = db_hash_init(state, "connections", 2048, DB_HASH_SIMPLE,
86                            &state->connections);
87         if (ret != 0) {
88                 D_ERR("Failed to initialise connection hash (%s)\n",
89                       strerror(ret));
90                 tevent_req_error(req, ret);
91                 return tevent_req_post(req, ev);
92         }
93
94         DBG_DEBUG("Adding %u connections to hash\n", conn_list->num);
95         for (i = 0; i < conn_list->num; i++) {
96                 struct ctdb_connection *c = &conn_list->conn[i];
97
98                 DBG_DEBUG("Adding connection to hash: %s\n",
99                           ctdb_connection_to_string(conn_list, c, true));
100
101                 /* Connection is stored as a key in the connections hash */
102                 ret = db_hash_add(state->connections,
103                                   (uint8_t *)discard_const(c), sizeof(*c),
104                                   NULL, 0);
105                 if (ret != 0) {
106                         D_ERR("Error adding connection to hash (%s)\n",
107                               strerror(ret));
108                         tevent_req_error(req, ret);
109                         return tevent_req_post(req, ev);
110                 }
111         }
112
113         state->attempts = 0;
114         state->max_attempts = 50;
115
116         state->retry_interval.tv_sec = 0;
117         state->retry_interval.tv_usec = 100 * 1000;
118
119         state->batch_count = 0;
120         state->batch_size = 300;
121
122         state->capture_fd =
123                 ctdb_sys_open_capture_socket(iface, &state->private_data);
124         if (state->capture_fd == -1) {
125                 D_ERR("Failed to open capture socket on iface '%s' (%s)\n",
126                       iface, strerror(errno));
127                         tevent_req_error(req, EIO);
128                         return tevent_req_post(req, ev);
129         }
130
131         state->fde = tevent_add_fd(ev, state, state->capture_fd,
132                                    TEVENT_FD_READ,
133                                    reset_connections_capture_tcp_handler,
134                                    state);
135         if (tevent_req_nomem(state->fde, req)) {
136                 return tevent_req_post(req, ev);
137         }
138         tevent_fd_set_auto_close(state->fde);
139
140         subreq = tevent_wakeup_send(state, ev, tevent_timeval_current_ofs(0,0));
141         if (tevent_req_nomem(subreq, req)) {
142                 return tevent_req_post(req, ev);
143         }
144         tevent_req_set_callback(subreq, reset_connections_batch, req);
145
146         return req;
147 }
148
149 /*
150   called when we get a read event on the raw socket
151  */
152 static void reset_connections_capture_tcp_handler(struct tevent_context *ev,
153                                                   struct tevent_fd *fde,
154                                                   uint16_t flags,
155                                                   void *private_data)
156 {
157         struct reset_connections_state *state = talloc_get_type_abort(
158                 private_data, struct reset_connections_state);
159         /* 0 the parts that don't get set by ctdb_sys_read_tcp_packet */
160         struct ctdb_connection conn;
161         uint32_t ack_seq, seq;
162         int rst;
163         uint16_t window;
164         int ret;
165
166         ret = ctdb_sys_read_tcp_packet(state->capture_fd,
167                                        state->private_data,
168                                        &conn.server, &conn.client,
169                                        &ack_seq, &seq, &rst, &window);
170         if (ret != 0) {
171                 /* probably a non-tcp ACK packet */
172                 return;
173         }
174
175         if (window == htons(1234) && (rst || seq == 0)) {
176                 /* Ignore packets that we sent! */
177                 D_DEBUG("Ignoring packet: %s, "
178                         "seq=%"PRIu32", ack_seq=%"PRIu32", "
179                         "rst=%d, window=%"PRIu16"\n",
180                         ctdb_connection_to_string(state, &conn, false),
181                         seq, ack_seq, rst, ntohs(window));
182                 return;
183         }
184
185         /* Check if this connection is one being reset, if found then delete */
186         ret = db_hash_delete(state->connections,
187                              (uint8_t*)&conn, sizeof(conn));
188         if (ret == ENOENT) {
189                 /* Packet for some other connection, ignore */
190                 DBG_DEBUG("Ignoring packet for unknown connection: %s\n",
191                           ctdb_connection_to_string(state, &conn, true));
192                 return;
193         }
194         if (ret != 0) {
195                 DBG_WARNING("Internal error (%s)\n", strerror(ret));
196                 return;
197         }
198
199         D_INFO("Sending a TCP RST to for connection %s\n",
200                ctdb_connection_to_string(state, &conn, true));
201
202         ret = ctdb_sys_send_tcp(&conn.server, &conn.client, ack_seq, seq, 1);
203         if (ret != 0) {
204                 DBG_ERR("Error sending TCP RST for connection\n");
205         }
206 }
207
208 /*
209  * Called periodically until all sentenced connections have been reset
210  * or enough attempts have been made
211  */
212 static void reset_connections_batch(struct tevent_req *subreq)
213 {
214         struct tevent_req *req = tevent_req_callback_data(
215                 subreq, struct tevent_req);
216         struct reset_connections_state *state = tevent_req_data(
217                 req, struct reset_connections_state);
218         bool status;
219         int count, ret;
220
221         status = tevent_wakeup_recv(subreq);
222         TALLOC_FREE(subreq);
223
224         if (! status) {
225                 DBG_WARNING("Unexpected error on timer expiry\n");
226                 /* Keep going... */
227         }
228
229         /* loop over up to batch_size connections sending tickle ACKs */
230         state->batch_count = 0;
231         ret = db_hash_traverse(state->connections,
232                                reset_connections_tickle_connection,
233                                state, NULL);
234         if (ret != 0) {
235                 DBG_WARNING("Unexpected error traversing connections (%s)\n",
236                             strerror(ret));
237         }
238
239         state->attempts++;
240
241         /*
242          * If there are no more connections to kill or we have tried
243          * too many times we're finished
244          */
245         ret = db_hash_traverse(state->connections, NULL, NULL, &count);
246         if (ret != 0) {
247                 /* What now?  Try again until max_attempts reached */
248                 DBG_WARNING("Unexpected error traversing connections (%s)\n",
249                             strerror(ret));
250                 count = 1;
251         }
252         if (count == 0 ||
253             state->attempts >= state->max_attempts) {
254                 tevent_req_done(req);
255                 return;
256         }
257
258         /* Schedule next attempt */
259         subreq = tevent_wakeup_send(state, state->ev,
260                                     tevent_timeval_current_ofs(
261                                             state->retry_interval.tv_sec,
262                                             state->retry_interval.tv_usec));
263         if (tevent_req_nomem(subreq, req)) {
264                 return;
265         }
266         tevent_req_set_callback(subreq, reset_connections_batch, req);
267 }
268
269 static int reset_connections_tickle_connection(
270                                         uint8_t *keybuf, size_t keylen,
271                                         uint8_t *databuf, size_t datalen,
272                                         void *private_data)
273 {
274         struct reset_connections_state *state = talloc_get_type_abort(
275                 private_data, struct reset_connections_state);
276         struct ctdb_connection *conn;
277         int ret;
278
279         if (keylen != sizeof(*conn)) {
280                 DBG_WARNING("Unexpected data in connection hash\n");
281                 return 0;
282         }
283
284         conn = (struct ctdb_connection *)keybuf;
285
286         state->batch_count++;
287         if (state->batch_count > state->batch_size) {
288                 /* Terminate the traverse */
289                 return 1;
290         }
291
292         DBG_DEBUG("Sending tickle ACK for connection '%s'\n",
293                   ctdb_connection_to_string(state, conn, true));
294         ret = ctdb_sys_send_tcp(&conn->server, &conn->client, 0, 0, 0);
295         if (ret != 0) {
296                 DBG_ERR("Error sending tickle ACK\n");
297                 /* continue */
298         }
299
300         return 0;
301 }
302
303 static bool reset_connections_recv(struct tevent_req *req, int *perr)
304 {
305         int err;
306
307         if (tevent_req_is_unix_error(req, &err)) {
308                 if (perr != NULL) {
309                         *perr = err;
310                 }
311                 return false;
312         }
313
314         return true;
315 }
316
317 static void usage(const char *prog)
318 {
319         printf("usage: %s <interface> [ <srcip:port> <dstip:port> ]\n", prog);
320         exit(1);
321 }
322
323 int main(int argc, char **argv)
324 {
325         struct ctdb_connection conn;
326         struct tevent_context *ev = NULL;
327         struct TALLOC_CONTEXT *mem_ctx = NULL;
328         struct ctdb_connection_list *conn_list = NULL;
329         const char *t;
330         struct tevent_req *req;
331         int debug_level;
332         bool status;
333         int ret;
334
335         /* Set the debug level */
336         t = getenv("CTDB_DEBUGLEVEL");
337         if (t != NULL) {
338                 if (debug_level_parse(t, &debug_level)) {
339                         DEBUGLEVEL = debug_level;
340                 } else {
341                         DEBUGLEVEL = DEBUG_ERR;
342                 }
343         }
344
345         if (argc != 2 && argc != 4) {
346                 usage(argv[0]);
347         }
348
349         if (argc == 4) {
350                 ret = ctdb_sock_addr_from_string(argv[2], &conn.client, true);
351                 if (ret != 0) {
352                         D_ERR("Bad IP:port '%s'\n", argv[2]);
353                         goto fail;
354                 }
355
356                 ret = ctdb_sock_addr_from_string(argv[3], &conn.server, true);
357                 if (ret != 0) {
358                         D_ERR("Bad IP:port '%s'\n", argv[3]);
359                         goto fail;
360                 }
361
362
363                 conn_list = talloc_zero(mem_ctx, struct ctdb_connection_list);
364                 if (conn_list == NULL) {
365                         ret = ENOMEM;
366                         DBG_ERR("Internal error (%s)\n", strerror(ret));
367                         goto fail;
368                 }
369                 ret = ctdb_connection_list_add(conn_list, &conn);
370                 if (ret != 0) {
371                         DBG_ERR("Internal error (%s)\n", strerror(ret));
372                         goto fail;
373                 }
374         } else {
375                 ret = ctdb_connection_list_read(mem_ctx, true, &conn_list);
376                 if (ret != 0) {
377                         D_ERR("Unable to parse connections (%s)\n",
378                               strerror(ret));
379                         goto fail;
380                 }
381         }
382
383         mem_ctx = talloc_new(NULL);
384         if (mem_ctx == NULL) {
385                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
386                 goto fail;
387         }
388
389         ev = tevent_context_init(mem_ctx);
390         if (ev == NULL) {
391                 DEBUG(DEBUG_ERR, ("Failed to initialise tevent\n"));
392                 goto fail;
393         }
394
395         req = reset_connections_send(mem_ctx, ev, argv[1], conn_list);
396         if (req == NULL) {
397                 goto fail;
398         }
399
400         tevent_req_poll(req, ev);
401
402         status = reset_connections_recv(req, &ret);
403         if (! status) {
404                 D_ERR("Failed to kill connections (%s)\n", strerror(ret));
405                 goto fail;
406         }
407
408         talloc_free(mem_ctx);
409
410         return 0;
411
412 fail:
413         TALLOC_FREE(mem_ctx);
414         return -1;
415 }