ctdb-daemon: Stop using tevent compatibility definitions
[vlendec/samba-autobuild/.git] / ctdb / server / ctdb_update_record.c
1 /* 
2    implementation of the update record control
3
4    Copyright (C) Andrew Tridgell  2007
5    Copyright (C) Ronnie Sahlberg  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "lib/tdb_wrap/tdb_wrap.h"
23 #include "tdb.h"
24 #include "ctdb_private.h"
25 #include "common/system.h"
26
27 struct ctdb_persistent_write_state {
28         struct ctdb_db_context *ctdb_db;
29         struct ctdb_marshall_buffer *m;
30         struct ctdb_req_control *c;
31         uint32_t flags;
32 };
33
34 /* dont create/update records that does not exist locally */
35 #define UPDATE_FLAGS_REPLACE_ONLY       1
36
37 /*
38   called from a child process to write the data
39  */
40 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
41 {
42         int ret, i;
43         struct ctdb_rec_data *rec = NULL;
44         struct ctdb_marshall_buffer *m = state->m;
45
46         ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
47         if (ret == -1) {
48                 DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
49                                  state->ctdb_db->db_id));
50                 return -1;
51         }
52
53         for (i=0;i<m->count;i++) {
54                 struct ctdb_ltdb_header oldheader;
55                 struct ctdb_ltdb_header header;
56                 TDB_DATA key, data, olddata;
57                 TALLOC_CTX *tmp_ctx = talloc_new(state);
58
59                 rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
60
61                 if (rec == NULL) {
62                         DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
63                                          i, state->ctdb_db->db_id));
64                         talloc_free(tmp_ctx);
65                         goto failed;
66                 }
67
68                 /* we must check if the record exists or not because
69                    ctdb_ltdb_fetch will unconditionally create a record
70                  */
71                 if (state->flags & UPDATE_FLAGS_REPLACE_ONLY) {
72                         TDB_DATA trec;
73                         trec = tdb_fetch(state->ctdb_db->ltdb->tdb, key);
74                         if (trec.dsize == 0) {
75                                 talloc_free(tmp_ctx);
76                                 continue;
77                         }
78                         free(trec.dptr);
79                 }
80
81                 /* fetch the old header and ensure the rsn is less than the new rsn */
82                 ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
83                 if (ret != 0) {
84                         DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
85                                          state->ctdb_db->db_id));
86                         talloc_free(tmp_ctx);
87                         goto failed;
88                 }
89
90                 if (oldheader.rsn >= header.rsn &&
91                     (olddata.dsize != data.dsize ||
92                      memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
93                         DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
94                                           state->ctdb_db->db_id,
95                                           (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
96                         talloc_free(tmp_ctx);
97                         goto failed;
98                 }
99
100                 talloc_free(tmp_ctx);
101
102                 ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
103                 if (ret != 0) {
104                         DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
105                                           state->ctdb_db->db_id));
106                         goto failed;
107                 }
108         }
109
110         ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
111         if (ret == -1) {
112                 DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
113                                  state->ctdb_db->db_id));
114                 return -1;
115         }
116
117         return 0;
118
119 failed:
120         tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
121         return -1;
122 }
123
124
125 /*
126   called when we the child has completed the persistent write
127   on our behalf
128  */
129 static void ctdb_persistent_write_callback(int status, void *private_data)
130 {
131         struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
132                                                                    struct ctdb_persistent_write_state);
133
134
135         ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
136
137         talloc_free(state);
138 }
139
140 /*
141   called if our lockwait child times out
142  */
143 static void ctdb_persistent_lock_timeout(struct tevent_context *ev,
144                                          struct tevent_timer *te,
145                                          struct timeval t, void *private_data)
146 {
147         struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
148                                                                    struct ctdb_persistent_write_state);
149         ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
150         talloc_free(state);
151 }
152
153 struct childwrite_handle {
154         struct ctdb_context *ctdb;
155         struct ctdb_db_context *ctdb_db;
156         struct tevent_fd *fde;
157         int fd[2];
158         pid_t child;
159         void *private_data;
160         void (*callback)(int, void *);
161         struct timeval start_time;
162 };
163
164 static int childwrite_destructor(struct childwrite_handle *h)
165 {
166         CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
167         ctdb_kill(h->ctdb, h->child, SIGKILL);
168         return 0;
169 }
170
171 /* called when the child process has finished writing the record to the
172    database
173 */
174 static void childwrite_handler(struct tevent_context *ev,
175                                struct tevent_fd *fde,
176                                uint16_t flags, void *private_data)
177 {
178         struct childwrite_handle *h = talloc_get_type(private_data,
179                                                      struct childwrite_handle);
180         void *p = h->private_data;
181         void (*callback)(int, void *) = h->callback;
182         pid_t child = h->child;
183         TALLOC_CTX *tmp_ctx = talloc_new(ev);
184         int ret;
185         char c;
186
187         CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
188         CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
189
190         /* the handle needs to go away when the context is gone - when
191            the handle goes away this implicitly closes the pipe, which
192            kills the child */
193         talloc_steal(tmp_ctx, h);
194
195         talloc_set_destructor(h, NULL);
196
197         ret = sys_read(h->fd[0], &c, 1);
198         if (ret < 1) {
199                 DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
200                 c = 1;
201         }
202
203         callback(c, p);
204
205         ctdb_kill(h->ctdb, child, SIGKILL);
206         talloc_free(tmp_ctx);
207 }
208
209 /* this creates a child process which will take out a tdb transaction
210    and write the record to the database.
211 */
212 static struct childwrite_handle *ctdb_childwrite(
213                                 struct ctdb_db_context *ctdb_db,
214                                 void (*callback)(int, void *private_data),
215                                 struct ctdb_persistent_write_state *state)
216 {
217         struct childwrite_handle *result;
218         int ret;
219         pid_t parent = getpid();
220
221         CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
222         CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
223
224         if (!(result = talloc_zero(state, struct childwrite_handle))) {
225                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
226                 return NULL;
227         }
228
229         ret = pipe(result->fd);
230
231         if (ret != 0) {
232                 talloc_free(result);
233                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
234                 return NULL;
235         }
236
237         result->child = ctdb_fork(ctdb_db->ctdb);
238
239         if (result->child == (pid_t)-1) {
240                 close(result->fd[0]);
241                 close(result->fd[1]);
242                 talloc_free(result);
243                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
244                 return NULL;
245         }
246
247         result->callback = callback;
248         result->private_data = state;
249         result->ctdb = ctdb_db->ctdb;
250         result->ctdb_db = ctdb_db;
251
252         if (result->child == 0) {
253                 char c = 0;
254
255                 close(result->fd[0]);
256                 ctdb_set_process_name("ctdb_write_persistent");
257                 debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
258                 ret = ctdb_persistent_store(state);
259                 if (ret != 0) {
260                         DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
261                         c = 1;
262                 }
263
264                 sys_write(result->fd[1], &c, 1);
265
266                 /* make sure we die when our parent dies */
267                 while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
268                         sleep(5);
269                 }
270                 _exit(0);
271         }
272
273         close(result->fd[1]);
274         set_close_on_exec(result->fd[0]);
275
276         talloc_set_destructor(result, childwrite_destructor);
277
278         DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
279
280         result->fde = tevent_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
281                                     TEVENT_FD_READ, childwrite_handler,
282                                     (void *)result);
283         if (result->fde == NULL) {
284                 talloc_free(result);
285                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
286                 return NULL;
287         }
288         tevent_fd_set_auto_close(result->fde);
289
290         result->start_time = timeval_current();
291
292         return result;
293 }
294
295 /*
296    update a record on this node if the new record has a higher rsn than the
297    current record
298  */
299 int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
300                                    struct ctdb_req_control *c, TDB_DATA recdata,
301                                    bool *async_reply)
302 {
303         struct ctdb_db_context *ctdb_db;
304         struct ctdb_persistent_write_state *state;
305         struct childwrite_handle *handle;
306         struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
307
308         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
309                 DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
310                 return -1;
311         }
312
313         ctdb_db = find_ctdb_db(ctdb, m->db_id);
314         if (ctdb_db == NULL) {
315                 DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
316                 return -1;
317         }
318
319         if (ctdb_db->unhealthy_reason) {
320                 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
321                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
322                 return -1;
323         }
324
325         state = talloc(ctdb, struct ctdb_persistent_write_state);
326         CTDB_NO_MEMORY(ctdb, state);
327
328         state->ctdb_db = ctdb_db;
329         state->c       = c;
330         state->m       = m;
331         state->flags   = 0;
332         if (!ctdb_db->persistent) {
333                 state->flags   = UPDATE_FLAGS_REPLACE_ONLY;
334         }
335
336         /* create a child process to take out a transaction and
337            write the data.
338         */
339         handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
340         if (handle == NULL) {
341                 DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
342                 talloc_free(state);
343                 return -1;
344         }
345
346         /* we need to wait for the replies */
347         *async_reply = true;
348
349         /* need to keep the control structure around */
350         talloc_steal(state, c);
351
352         /* but we won't wait forever */
353         tevent_add_timer(ctdb->ev, state,
354                          timeval_current_ofs(ctdb->tunable.control_timeout, 0),
355                          ctdb_persistent_lock_timeout, state);
356
357         return 0;
358 }
359