2 implementation of the update record control
4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tevent/tevent.h"
24 #include "lib/tdb/include/tdb.h"
25 #include "ctdb_private.h"
27 struct ctdb_persistent_write_state {
28 struct ctdb_db_context *ctdb_db;
29 struct ctdb_marshall_buffer *m;
30 struct ctdb_req_control *c;
35 called from a child process to write the data
37 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
40 struct ctdb_rec_data *rec = NULL;
41 struct ctdb_marshall_buffer *m = state->m;
43 ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
45 DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
46 state->ctdb_db->db_id));
50 for (i=0;i<m->count;i++) {
51 struct ctdb_ltdb_header oldheader;
52 struct ctdb_ltdb_header header;
53 TDB_DATA key, data, olddata;
54 TALLOC_CTX *tmp_ctx = talloc_new(state);
56 rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
59 DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
60 i, state->ctdb_db->db_id));
65 /* fetch the old header and ensure the rsn is less than the new rsn */
66 ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
68 DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
69 state->ctdb_db->db_id));
74 if (oldheader.rsn >= header.rsn &&
75 (olddata.dsize != data.dsize ||
76 memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
77 DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
78 state->ctdb_db->db_id,
79 (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
86 ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
88 DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
89 state->ctdb_db->db_id));
94 ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
96 DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
97 state->ctdb_db->db_id));
104 tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
110 called when we the child has completed the persistent write
113 static void ctdb_persistent_write_callback(int status, void *private_data)
115 struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
116 struct ctdb_persistent_write_state);
119 ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
125 called if our lockwait child times out
127 static void ctdb_persistent_lock_timeout(struct event_context *ev, struct timed_event *te,
128 struct timeval t, void *private_data)
130 struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
131 struct ctdb_persistent_write_state);
132 ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
136 struct childwrite_handle {
137 struct ctdb_context *ctdb;
138 struct ctdb_db_context *ctdb_db;
139 struct fd_event *fde;
143 void (*callback)(int, void *);
144 struct timeval start_time;
147 static int childwrite_destructor(struct childwrite_handle *h)
149 CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
150 kill(h->child, SIGKILL);
154 /* called when the child process has finished writing the record to the
157 static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
158 uint16_t flags, void *private_data)
160 struct childwrite_handle *h = talloc_get_type(private_data,
161 struct childwrite_handle);
162 void *p = h->private_data;
163 void (*callback)(int, void *) = h->callback;
164 pid_t child = h->child;
165 TALLOC_CTX *tmp_ctx = talloc_new(ev);
169 CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
170 CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
172 /* the handle needs to go away when the context is gone - when
173 the handle goes away this implicitly closes the pipe, which
175 talloc_steal(tmp_ctx, h);
177 talloc_set_destructor(h, NULL);
179 ret = read(h->fd[0], &c, 1);
181 DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
187 kill(child, SIGKILL);
188 talloc_free(tmp_ctx);
191 /* this creates a child process which will take out a tdb transaction
192 and write the record to the database.
194 struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
195 void (*callback)(int, void *private_data),
196 struct ctdb_persistent_write_state *state)
198 struct childwrite_handle *result;
200 pid_t parent = getpid();
202 CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
203 CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
205 if (!(result = talloc_zero(state, struct childwrite_handle))) {
206 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
210 ret = pipe(result->fd);
214 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
218 result->child = ctdb_fork(ctdb_db->ctdb);
220 if (result->child == (pid_t)-1) {
221 close(result->fd[0]);
222 close(result->fd[1]);
224 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
228 result->callback = callback;
229 result->private_data = state;
230 result->ctdb = ctdb_db->ctdb;
231 result->ctdb_db = ctdb_db;
233 if (result->child == 0) {
236 close(result->fd[0]);
237 debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
238 ret = ctdb_persistent_store(state);
240 DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
244 write(result->fd[1], &c, 1);
246 /* make sure we die when our parent dies */
247 while (kill(parent, 0) == 0 || errno != ESRCH) {
253 close(result->fd[1]);
254 set_close_on_exec(result->fd[0]);
256 talloc_set_destructor(result, childwrite_destructor);
258 DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
260 result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
261 EVENT_FD_READ, childwrite_handler,
263 if (result->fde == NULL) {
265 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
268 tevent_fd_set_auto_close(result->fde);
270 result->start_time = timeval_current();
276 update a record on this node if the new record has a higher rsn than the
279 int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
280 struct ctdb_req_control *c, TDB_DATA recdata,
283 struct ctdb_db_context *ctdb_db;
284 struct ctdb_persistent_write_state *state;
285 struct childwrite_handle *handle;
286 struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
288 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
289 DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
293 ctdb_db = find_ctdb_db(ctdb, m->db_id);
294 if (ctdb_db == NULL) {
295 DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
299 if (ctdb_db->unhealthy_reason) {
300 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
301 ctdb_db->db_name, ctdb_db->unhealthy_reason));
305 state = talloc(ctdb, struct ctdb_persistent_write_state);
306 CTDB_NO_MEMORY(ctdb, state);
308 state->ctdb_db = ctdb_db;
312 /* create a child process to take out a transaction and
315 handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
316 if (handle == NULL) {
317 DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
322 /* we need to wait for the replies */
325 /* need to keep the control structure around */
326 talloc_steal(state, c);
328 /* but we won't wait forever */
329 event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.control_timeout, 0),
330 ctdb_persistent_lock_timeout, state);