s4:irpc/tests: make use explicit use of the top level event context
[bbaumbach/samba-autobuild/.git] / ctdb / server / ctdb_update_record.c
1 /* 
2    implementation of the update record control
3
4    Copyright (C) Andrew Tridgell  2007
5    Copyright (C) Ronnie Sahlberg  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "db_wrap.h"
23 #include "tdb.h"
24 #include "ctdb_private.h"
25
26 struct ctdb_persistent_write_state {
27         struct ctdb_db_context *ctdb_db;
28         struct ctdb_marshall_buffer *m;
29         struct ctdb_req_control *c;
30         uint32_t flags;
31 };
32
33 /* dont create/update records that does not exist locally */
34 #define UPDATE_FLAGS_REPLACE_ONLY       1
35
36 /*
37   called from a child process to write the data
38  */
39 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
40 {
41         int ret, i;
42         struct ctdb_rec_data *rec = NULL;
43         struct ctdb_marshall_buffer *m = state->m;
44
45         ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
46         if (ret == -1) {
47                 DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
48                                  state->ctdb_db->db_id));
49                 return -1;
50         }
51
52         for (i=0;i<m->count;i++) {
53                 struct ctdb_ltdb_header oldheader;
54                 struct ctdb_ltdb_header header;
55                 TDB_DATA key, data, olddata;
56                 TALLOC_CTX *tmp_ctx = talloc_new(state);
57
58                 rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
59
60                 if (rec == NULL) {
61                         DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
62                                          i, state->ctdb_db->db_id));
63                         talloc_free(tmp_ctx);
64                         goto failed;
65                 }
66
67                 /* we must check if the record exists or not because
68                    ctdb_ltdb_fetch will unconditionally create a record
69                  */
70                 if (state->flags & UPDATE_FLAGS_REPLACE_ONLY) {
71                         TDB_DATA trec;
72                         trec = tdb_fetch(state->ctdb_db->ltdb->tdb, key);
73                         if (trec.dsize == 0) {
74                                 talloc_free(tmp_ctx);
75                                 continue;
76                         }
77                         free(trec.dptr);
78                 }
79
80                 /* fetch the old header and ensure the rsn is less than the new rsn */
81                 ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
82                 if (ret != 0) {
83                         DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
84                                          state->ctdb_db->db_id));
85                         talloc_free(tmp_ctx);
86                         goto failed;
87                 }
88
89                 if (oldheader.rsn >= header.rsn &&
90                     (olddata.dsize != data.dsize ||
91                      memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
92                         DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
93                                           state->ctdb_db->db_id,
94                                           (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
95                         talloc_free(tmp_ctx);
96                         goto failed;
97                 }
98
99                 talloc_free(tmp_ctx);
100
101                 ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
102                 if (ret != 0) {
103                         DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
104                                           state->ctdb_db->db_id));
105                         goto failed;
106                 }
107         }
108
109         ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
110         if (ret == -1) {
111                 DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
112                                  state->ctdb_db->db_id));
113                 return -1;
114         }
115
116         return 0;
117
118 failed:
119         tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
120         return -1;
121 }
122
123
124 /*
125   called when we the child has completed the persistent write
126   on our behalf
127  */
128 static void ctdb_persistent_write_callback(int status, void *private_data)
129 {
130         struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
131                                                                    struct ctdb_persistent_write_state);
132
133
134         ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
135
136         talloc_free(state);
137 }
138
139 /*
140   called if our lockwait child times out
141  */
142 static void ctdb_persistent_lock_timeout(struct event_context *ev, struct timed_event *te,
143                                          struct timeval t, void *private_data)
144 {
145         struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
146                                                                    struct ctdb_persistent_write_state);
147         ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
148         talloc_free(state);
149 }
150
151 struct childwrite_handle {
152         struct ctdb_context *ctdb;
153         struct ctdb_db_context *ctdb_db;
154         struct fd_event *fde;
155         int fd[2];
156         pid_t child;
157         void *private_data;
158         void (*callback)(int, void *);
159         struct timeval start_time;
160 };
161
162 static int childwrite_destructor(struct childwrite_handle *h)
163 {
164         CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
165         ctdb_kill(h->ctdb, h->child, SIGKILL);
166         return 0;
167 }
168
169 /* called when the child process has finished writing the record to the
170    database
171 */
172 static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
173                              uint16_t flags, void *private_data)
174 {
175         struct childwrite_handle *h = talloc_get_type(private_data,
176                                                      struct childwrite_handle);
177         void *p = h->private_data;
178         void (*callback)(int, void *) = h->callback;
179         pid_t child = h->child;
180         TALLOC_CTX *tmp_ctx = talloc_new(ev);
181         int ret;
182         char c;
183
184         CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
185         CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
186
187         /* the handle needs to go away when the context is gone - when
188            the handle goes away this implicitly closes the pipe, which
189            kills the child */
190         talloc_steal(tmp_ctx, h);
191
192         talloc_set_destructor(h, NULL);
193
194         ret = read(h->fd[0], &c, 1);
195         if (ret < 1) {
196                 DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
197                 c = 1;
198         }
199
200         callback(c, p);
201
202         ctdb_kill(h->ctdb, child, SIGKILL);
203         talloc_free(tmp_ctx);
204 }
205
206 /* this creates a child process which will take out a tdb transaction
207    and write the record to the database.
208 */
209 static struct childwrite_handle *ctdb_childwrite(
210                                 struct ctdb_db_context *ctdb_db,
211                                 void (*callback)(int, void *private_data),
212                                 struct ctdb_persistent_write_state *state)
213 {
214         struct childwrite_handle *result;
215         int ret;
216         pid_t parent = getpid();
217
218         CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
219         CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
220
221         if (!(result = talloc_zero(state, struct childwrite_handle))) {
222                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
223                 return NULL;
224         }
225
226         ret = pipe(result->fd);
227
228         if (ret != 0) {
229                 talloc_free(result);
230                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
231                 return NULL;
232         }
233
234         result->child = ctdb_fork(ctdb_db->ctdb);
235
236         if (result->child == (pid_t)-1) {
237                 close(result->fd[0]);
238                 close(result->fd[1]);
239                 talloc_free(result);
240                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
241                 return NULL;
242         }
243
244         result->callback = callback;
245         result->private_data = state;
246         result->ctdb = ctdb_db->ctdb;
247         result->ctdb_db = ctdb_db;
248
249         if (result->child == 0) {
250                 char c = 0;
251
252                 close(result->fd[0]);
253                 ctdb_set_process_name("ctdb_write_persistent");
254                 debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
255                 ret = ctdb_persistent_store(state);
256                 if (ret != 0) {
257                         DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
258                         c = 1;
259                 }
260
261                 write(result->fd[1], &c, 1);
262
263                 /* make sure we die when our parent dies */
264                 while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
265                         sleep(5);
266                 }
267                 _exit(0);
268         }
269
270         close(result->fd[1]);
271         set_close_on_exec(result->fd[0]);
272
273         talloc_set_destructor(result, childwrite_destructor);
274
275         DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
276
277         result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
278                                    EVENT_FD_READ, childwrite_handler,
279                                    (void *)result);
280         if (result->fde == NULL) {
281                 talloc_free(result);
282                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
283                 return NULL;
284         }
285         tevent_fd_set_auto_close(result->fde);
286
287         result->start_time = timeval_current();
288
289         return result;
290 }
291
292 /*
293    update a record on this node if the new record has a higher rsn than the
294    current record
295  */
296 int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
297                                    struct ctdb_req_control *c, TDB_DATA recdata,
298                                    bool *async_reply)
299 {
300         struct ctdb_db_context *ctdb_db;
301         struct ctdb_persistent_write_state *state;
302         struct childwrite_handle *handle;
303         struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
304
305         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
306                 DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
307                 return -1;
308         }
309
310         ctdb_db = find_ctdb_db(ctdb, m->db_id);
311         if (ctdb_db == NULL) {
312                 DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
313                 return -1;
314         }
315
316         if (ctdb_db->unhealthy_reason) {
317                 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
318                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
319                 return -1;
320         }
321
322         state = talloc(ctdb, struct ctdb_persistent_write_state);
323         CTDB_NO_MEMORY(ctdb, state);
324
325         state->ctdb_db = ctdb_db;
326         state->c       = c;
327         state->m       = m;
328         state->flags   = 0;
329         if (!ctdb_db->persistent) {
330                 state->flags   = UPDATE_FLAGS_REPLACE_ONLY;
331         }
332
333         /* create a child process to take out a transaction and
334            write the data.
335         */
336         handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
337         if (handle == NULL) {
338                 DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
339                 talloc_free(state);
340                 return -1;
341         }
342
343         /* we need to wait for the replies */
344         *async_reply = true;
345
346         /* need to keep the control structure around */
347         talloc_steal(state, c);
348
349         /* but we won't wait forever */
350         event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.control_timeout, 0),
351                         ctdb_persistent_lock_timeout, state);
352
353         return 0;
354 }
355