server: move the code for the update_record control to its own file
[samba.git] / ctdb / server / ctdb_update_record.c
1 /* 
2    implementation of the update record control
3
4    Copyright (C) Andrew Tridgell  2007
5    Copyright (C) Ronnie Sahlberg  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "lib/tevent/tevent.h"
23 #include "db_wrap.h"
24 #include "lib/tdb/include/tdb.h"
25 #include "ctdb_private.h"
26
27 struct ctdb_persistent_write_state {
28         struct ctdb_db_context *ctdb_db;
29         struct ctdb_marshall_buffer *m;
30         struct ctdb_req_control *c;
31 };
32
33
34 /*
35   called from a child process to write the data
36  */
37 static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
38 {
39         int ret, i;
40         struct ctdb_rec_data *rec = NULL;
41         struct ctdb_marshall_buffer *m = state->m;
42
43         ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
44         if (ret == -1) {
45                 DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
46                                  state->ctdb_db->db_id));
47                 return -1;
48         }
49
50         for (i=0;i<m->count;i++) {
51                 struct ctdb_ltdb_header oldheader;
52                 struct ctdb_ltdb_header header;
53                 TDB_DATA key, data, olddata;
54                 TALLOC_CTX *tmp_ctx = talloc_new(state);
55
56                 rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
57                 
58                 if (rec == NULL) {
59                         DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
60                                          i, state->ctdb_db->db_id));
61                         talloc_free(tmp_ctx);
62                         goto failed;                    
63                 }
64
65                 /* fetch the old header and ensure the rsn is less than the new rsn */
66                 ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
67                 if (ret != 0) {
68                         DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
69                                          state->ctdb_db->db_id));
70                         talloc_free(tmp_ctx);
71                         goto failed;
72                 }
73
74                 if (oldheader.rsn >= header.rsn &&
75                     (olddata.dsize != data.dsize || 
76                      memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
77                         DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
78                                           state->ctdb_db->db_id, 
79                                           (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
80                         talloc_free(tmp_ctx);
81                         goto failed;
82                 }
83
84                 talloc_free(tmp_ctx);
85
86                 ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
87                 if (ret != 0) {
88                         DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n", 
89                                           state->ctdb_db->db_id));
90                         goto failed;
91                 }
92         }
93
94         ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
95         if (ret == -1) {
96                 DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
97                                  state->ctdb_db->db_id));
98                 return -1;
99         }
100
101         return 0;
102         
103 failed:
104         tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
105         return -1;
106 }
107
108
109 /*
110   called when we the child has completed the persistent write
111   on our behalf
112  */
113 static void ctdb_persistent_write_callback(int status, void *private_data)
114 {
115         struct ctdb_persistent_write_state *state = talloc_get_type(private_data, 
116                                                                    struct ctdb_persistent_write_state);
117
118
119         ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
120
121         talloc_free(state);
122 }
123
124 /*
125   called if our lockwait child times out
126  */
127 static void ctdb_persistent_lock_timeout(struct event_context *ev, struct timed_event *te, 
128                                          struct timeval t, void *private_data)
129 {
130         struct ctdb_persistent_write_state *state = talloc_get_type(private_data, 
131                                                                    struct ctdb_persistent_write_state);
132         ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
133         talloc_free(state);
134 }
135
136 struct childwrite_handle {
137         struct ctdb_context *ctdb;
138         struct ctdb_db_context *ctdb_db;
139         struct fd_event *fde;
140         int fd[2];
141         pid_t child;
142         void *private_data;
143         void (*callback)(int, void *);
144         struct timeval start_time;
145 };
146
147 static int childwrite_destructor(struct childwrite_handle *h)
148 {
149         CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
150         kill(h->child, SIGKILL);
151         return 0;
152 }
153
154 /* called when the child process has finished writing the record to the
155    database
156 */
157 static void childwrite_handler(struct event_context *ev, struct fd_event *fde, 
158                              uint16_t flags, void *private_data)
159 {
160         struct childwrite_handle *h = talloc_get_type(private_data, 
161                                                      struct childwrite_handle);
162         void *p = h->private_data;
163         void (*callback)(int, void *) = h->callback;
164         pid_t child = h->child;
165         TALLOC_CTX *tmp_ctx = talloc_new(ev);
166         int ret;
167         char c;
168
169         CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
170         CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
171
172         /* the handle needs to go away when the context is gone - when
173            the handle goes away this implicitly closes the pipe, which
174            kills the child */
175         talloc_steal(tmp_ctx, h);
176
177         talloc_set_destructor(h, NULL);
178
179         ret = read(h->fd[0], &c, 1);
180         if (ret < 1) {
181                 DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
182                 c = 1;
183         }
184
185         callback(c, p);
186
187         kill(child, SIGKILL);
188         talloc_free(tmp_ctx);
189 }
190
191 /* this creates a child process which will take out a tdb transaction
192    and write the record to the database.
193 */
194 struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
195                                 void (*callback)(int, void *private_data),
196                                 struct ctdb_persistent_write_state *state)
197 {
198         struct childwrite_handle *result;
199         int ret;
200         pid_t parent = getpid();
201
202         CTDB_INCREMENT_STAT(ctdb_db->ctdb, childwrite_calls);
203         CTDB_INCREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
204
205         if (!(result = talloc_zero(state, struct childwrite_handle))) {
206                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
207                 return NULL;
208         }
209
210         ret = pipe(result->fd);
211
212         if (ret != 0) {
213                 talloc_free(result);
214                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
215                 return NULL;
216         }
217
218         result->child = ctdb_fork(ctdb_db->ctdb);
219
220         if (result->child == (pid_t)-1) {
221                 close(result->fd[0]);
222                 close(result->fd[1]);
223                 talloc_free(result);
224                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
225                 return NULL;
226         }
227
228         result->callback = callback;
229         result->private_data = state;
230         result->ctdb = ctdb_db->ctdb;
231         result->ctdb_db = ctdb_db;
232
233         if (result->child == 0) {
234                 char c = 0;
235
236                 close(result->fd[0]);
237                 debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
238                 ret = ctdb_persistent_store(state);
239                 if (ret != 0) {
240                         DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
241                         c = 1;
242                 }
243
244                 write(result->fd[1], &c, 1);
245
246                 /* make sure we die when our parent dies */
247                 while (kill(parent, 0) == 0 || errno != ESRCH) {
248                         sleep(5);
249                 }
250                 _exit(0);
251         }
252
253         close(result->fd[1]);
254         set_close_on_exec(result->fd[0]);
255
256         talloc_set_destructor(result, childwrite_destructor);
257
258         DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d for ctdb_childwrite\n", result->fd[0]));
259
260         result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
261                                    EVENT_FD_READ, childwrite_handler,
262                                    (void *)result);
263         if (result->fde == NULL) {
264                 talloc_free(result);
265                 CTDB_DECREMENT_STAT(ctdb_db->ctdb, pending_childwrite_calls);
266                 return NULL;
267         }
268         tevent_fd_set_auto_close(result->fde);
269
270         result->start_time = timeval_current();
271
272         return result;
273 }
274
275 /* 
276    update a record on this node if the new record has a higher rsn than the
277    current record
278  */
279 int32_t ctdb_control_update_record(struct ctdb_context *ctdb, 
280                                    struct ctdb_req_control *c, TDB_DATA recdata, 
281                                    bool *async_reply)
282 {
283         struct ctdb_db_context *ctdb_db;
284         struct ctdb_persistent_write_state *state;
285         struct childwrite_handle *handle;
286         struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
287
288         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
289                 DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
290                 return -1;
291         }
292
293         ctdb_db = find_ctdb_db(ctdb, m->db_id);
294         if (ctdb_db == NULL) {
295                 DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
296                 return -1;
297         }
298
299         if (ctdb_db->unhealthy_reason) {
300                 DEBUG(DEBUG_ERR,("db(%s) unhealty in ctdb_control_update_record: %s\n",
301                                  ctdb_db->db_name, ctdb_db->unhealthy_reason));
302                 return -1;
303         }
304
305         state = talloc(ctdb, struct ctdb_persistent_write_state);
306         CTDB_NO_MEMORY(ctdb, state);
307
308         state->ctdb_db = ctdb_db;
309         state->c       = c;
310         state->m       = m;
311
312         /* create a child process to take out a transaction and 
313            write the data.
314         */
315         handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
316         if (handle == NULL) {
317                 DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
318                 talloc_free(state);
319                 return -1;
320         }
321
322         /* we need to wait for the replies */
323         *async_reply = true;
324
325         /* need to keep the control structure around */
326         talloc_steal(state, c);
327
328         /* but we won't wait forever */
329         event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.control_timeout, 0),
330                         ctdb_persistent_lock_timeout, state);
331
332         return 0;
333 }
334