e001d9cb768af5fe64ac2449e7ce550fa75a6f85
[vlendec/samba-autobuild/.git] / source3 / lib / tdb_validate.c
1 /*
2  * Unix SMB/CIFS implementation.
3  *
4  * A general tdb content validation mechanism
5  *
6  * Copyright (C) Michael Adam      2007
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21
22 #include "tdb_validate.h"
23 #include "includes.h"
24
25 /*
26  * internal validation function, executed by the child.
27  */
28 static int tdb_validate_child(struct tdb_context *tdb,
29                               tdb_validate_data_func validate_fn)
30 {
31         int ret = 1;
32         int num_entries = 0;
33         struct tdb_validation_status v_status;
34
35         v_status.tdb_error = False;
36         v_status.bad_freelist = False;
37         v_status.bad_entry = False;
38         v_status.unknown_key = False;
39         v_status.success = True;
40
41         if (!tdb) {
42                 v_status.tdb_error = True;
43                 v_status.success = False;
44                 goto out;
45         }
46
47         /*
48          * we can simplify this by passing a check function,
49          * but I don't want to change all the callers...
50          */
51         ret = tdb_check(tdb, NULL, NULL);
52         if (ret == -1) {
53                 v_status.tdb_error = True;
54                 v_status.success = False;
55                 goto out;
56         }
57
58         /* Check if the tdb's freelist is good. */
59         if (tdb_validate_freelist(tdb, &num_entries) == -1) {
60                 v_status.bad_freelist = True;
61                 v_status.success = False;
62                 goto out;
63         }
64
65         DEBUG(10,("tdb_validate_child: tdb %s freelist has %d entries\n",
66                   tdb_name(tdb), num_entries));
67
68         /* Now traverse the tdb to validate it. */
69         num_entries = tdb_traverse(tdb, validate_fn, (void *)&v_status);
70         if (!v_status.success) {
71                 goto out;
72         } else if (num_entries == -1) {
73                 v_status.tdb_error = True;
74                 v_status.success = False;
75                 goto out;
76         }
77
78         DEBUG(10,("tdb_validate_child: tdb %s is good with %d entries\n",
79                   tdb_name(tdb), num_entries));
80         ret = 0; /* Cache is good. */
81
82 out:
83         DEBUG(10,   ("tdb_validate_child: summary of validation status:\n"));
84         DEBUGADD(10,(" * tdb error: %s\n", v_status.tdb_error ? "yes" : "no"));
85         DEBUGADD(10,(" * bad freelist: %s\n",v_status.bad_freelist?"yes":"no"));
86         DEBUGADD(10,(" * bad entry: %s\n", v_status.bad_entry ? "yes" : "no"));
87         DEBUGADD(10,(" * unknown key: %s\n", v_status.unknown_key?"yes":"no"));
88         DEBUGADD(10,(" => overall success: %s\n", v_status.success?"yes":"no"));
89
90         return ret;
91 }
92
93 /*
94  * tdb validation function.
95  * returns 0 if tdb is ok, != 0 if it isn't.
96  * this function expects an opened tdb.
97  */
98 int tdb_validate(struct tdb_context *tdb, tdb_validate_data_func validate_fn)
99 {
100         pid_t child_pid = -1;
101         int child_status = 0;
102         int wait_pid = 0;
103         int ret = 1;
104
105         if (tdb == NULL) {
106                 DEBUG(1, ("Error: tdb_validate called with tdb == NULL\n"));
107                 return ret;
108         }
109
110         DEBUG(5, ("tdb_validate called for tdb '%s'\n", tdb_name(tdb)));
111
112         /* fork and let the child do the validation.
113          * benefit: no need to twist signal handlers and panic functions.
114          * just let the child panic. we catch the signal. */
115
116         DEBUG(10, ("tdb_validate: forking to let child do validation.\n"));
117         child_pid = sys_fork();
118         if (child_pid == 0) {
119                 /* child code */
120                 DEBUG(10, ("tdb_validate (validation child): created\n"));
121                 DEBUG(10, ("tdb_validate (validation child): "
122                            "calling tdb_validate_child\n"));
123                 exit(tdb_validate_child(tdb, validate_fn));
124         }
125         else if (child_pid < 0) {
126                 DEBUG(1, ("tdb_validate: fork for validation failed.\n"));
127                 goto done;
128         }
129
130         /* parent */
131
132         DEBUG(10, ("tdb_validate: fork succeeded, child PID = %u\n",
133                 (unsigned int)child_pid));
134
135         DEBUG(10, ("tdb_validate: waiting for child to finish...\n"));
136         while  ((wait_pid = sys_waitpid(child_pid, &child_status, 0)) < 0) {
137                 if (errno == EINTR) {
138                         DEBUG(10, ("tdb_validate: got signal during waitpid, "
139                                    "retrying\n"));
140                         errno = 0;
141                         continue;
142                 }
143                 DEBUG(1, ("tdb_validate: waitpid failed with error '%s'.\n",
144                           strerror(errno)));
145                 goto done;
146         }
147         if (wait_pid != child_pid) {
148                 DEBUG(1, ("tdb_validate: waitpid returned pid %d, "
149                           "but %u was expected\n", wait_pid, (unsigned int)child_pid));
150                 goto done;
151         }
152
153         DEBUG(10, ("tdb_validate: validating child returned.\n"));
154         if (WIFEXITED(child_status)) {
155                 DEBUG(10, ("tdb_validate: child exited, code %d.\n",
156                            WEXITSTATUS(child_status)));
157                 ret = WEXITSTATUS(child_status);
158         }
159         if (WIFSIGNALED(child_status)) {
160                 DEBUG(10, ("tdb_validate: child terminated by signal %d\n",
161                            WTERMSIG(child_status)));
162 #ifdef WCOREDUMP
163                 if (WCOREDUMP(child_status)) {
164                         DEBUGADD(10, ("core dumped\n"));
165                 }
166 #endif
167                 ret = WTERMSIG(child_status);
168         }
169         if (WIFSTOPPED(child_status)) {
170                 DEBUG(10, ("tdb_validate: child was stopped by signal %d\n",
171                            WSTOPSIG(child_status)));
172                 ret = WSTOPSIG(child_status);
173         }
174
175 done:
176         DEBUG(5, ("tdb_validate returning code '%d' for tdb '%s'\n", ret,
177                   tdb_name(tdb)));
178
179         return ret;
180 }
181
182 /*
183  * tdb validation function.
184  * returns 0 if tdb is ok, != 0 if it isn't.
185  * this is a wrapper around the actual validation function that opens and closes
186  * the tdb.
187  */
188 int tdb_validate_open(const char *tdb_path, tdb_validate_data_func validate_fn)
189 {
190         TDB_CONTEXT *tdb = NULL;
191         int ret = 1;
192
193         DEBUG(5, ("tdb_validate_open called for tdb '%s'\n", tdb_path));
194
195         tdb = tdb_open_log(tdb_path, 0, TDB_DEFAULT, O_RDWR, 0);
196         if (!tdb) {
197                 DEBUG(1, ("Error opening tdb %s\n", tdb_path));
198                 return ret;
199         }
200
201         ret = tdb_validate(tdb, validate_fn);
202         tdb_close(tdb);
203         return ret;
204 }
205
206 /*
207  * tdb backup function and helpers for tdb_validate wrapper with backup
208  * handling.
209  */
210
211 /* this structure eliminates the need for a global overall status for
212  * the traverse-copy */
213 struct tdb_copy_data {
214         struct tdb_context *dst;
215         bool success;
216 };
217
218 static int traverse_copy_fn(struct tdb_context *tdb, TDB_DATA key,
219                             TDB_DATA dbuf, void *private_data)
220 {
221         struct tdb_copy_data *data = (struct tdb_copy_data *)private_data;
222
223         if (tdb_store(data->dst, key, dbuf, TDB_INSERT) != 0) {
224                 DEBUG(4, ("Failed to insert into %s: %s\n", tdb_name(data->dst),
225                           strerror(errno)));
226                 data->success = False;
227                 return 1;
228         }
229         return 0;
230 }
231
232 static int tdb_copy(struct tdb_context *src, struct tdb_context *dst)
233 {
234         struct tdb_copy_data data;
235         int count;
236
237         data.dst = dst;
238         data.success = True;
239
240         count = tdb_traverse(src, traverse_copy_fn, (void *)(&data));
241         if ((count < 0) || (data.success == False)) {
242                 return -1;
243         }
244         return count;
245 }
246
247 static int tdb_verify_basic(struct tdb_context *tdb)
248 {
249         return tdb_traverse(tdb, NULL, NULL);
250 }
251
252 /* this backup function is essentially taken from lib/tdb/tools/tdbbackup.tdb
253  */
254 static int tdb_backup(TALLOC_CTX *ctx, const char *src_path,
255                       const char *dst_path, int hash_size)
256 {
257         struct tdb_context *src_tdb = NULL;
258         struct tdb_context *dst_tdb = NULL;
259         char *tmp_path = NULL;
260         struct stat st;
261         int count1, count2;
262         int saved_errno = 0;
263         int ret = -1;
264
265         if (stat(src_path, &st) != 0) {
266                 DEBUG(3, ("Could not stat '%s': %s\n", src_path,
267                           strerror(errno)));
268                 goto done;
269         }
270
271         /* open old tdb RDWR - so we can lock it */
272         src_tdb = tdb_open_log(src_path, 0, TDB_DEFAULT, O_RDWR, 0);
273         if (src_tdb == NULL) {
274                 DEBUG(3, ("Failed to open tdb '%s'\n", src_path));
275                 goto done;
276         }
277
278         if (tdb_lockall(src_tdb) != 0) {
279                 DEBUG(3, ("Failed to lock tdb '%s'\n", src_path));
280                 goto done;
281         }
282
283         tmp_path = talloc_asprintf(ctx, "%s%s", dst_path, ".tmp");
284         if (!tmp_path) {
285                 DEBUG(3, ("talloc fail\n"));
286                 goto done;
287         }
288
289         unlink(tmp_path);
290         dst_tdb = tdb_open_log(tmp_path,
291                                hash_size ? hash_size : tdb_hash_size(src_tdb),
292                                TDB_DEFAULT, O_RDWR | O_CREAT | O_EXCL,
293                                st.st_mode & 0777);
294         if (dst_tdb == NULL) {
295                 DEBUG(3, ("Error creating tdb '%s': %s\n", tmp_path,
296                           strerror(errno)));
297                 saved_errno = errno;
298                 unlink(tmp_path);
299                 goto done;
300         }
301
302         count1 = tdb_copy(src_tdb, dst_tdb);
303         if (count1 < 0) {
304                 DEBUG(3, ("Failed to copy tdb '%s': %s\n", src_path,
305                           strerror(errno)));
306                 tdb_close(dst_tdb);
307                 goto done;
308         }
309
310         /* reopen ro and do basic verification */
311         tdb_close(dst_tdb);
312         dst_tdb = tdb_open_log(tmp_path, 0, TDB_DEFAULT, O_RDONLY, 0);
313         if (!dst_tdb) {
314                 DEBUG(3, ("Failed to reopen tdb '%s': %s\n", tmp_path,
315                           strerror(errno)));
316                 goto done;
317         }
318         count2 = tdb_verify_basic(dst_tdb);
319         if (count2 != count1) {
320                 DEBUG(3, ("Failed to verify result of copying tdb '%s'.\n",
321                           src_path));
322                 tdb_close(dst_tdb);
323                 goto done;
324         }
325
326         DEBUG(10, ("tdb_backup: successfully copied %d entries\n", count1));
327
328         /* make sure the new tdb has reached stable storage
329          * then rename it to its destination */
330         fsync(tdb_fd(dst_tdb));
331         tdb_close(dst_tdb);
332         unlink(dst_path);
333         if (rename(tmp_path, dst_path) != 0) {
334                 DEBUG(3, ("Failed to rename '%s' to '%s': %s\n",
335                           tmp_path, dst_path, strerror(errno)));
336                 goto done;
337         }
338
339         /* success */
340         ret = 0;
341
342 done:
343         if (src_tdb != NULL) {
344                 tdb_close(src_tdb);
345         }
346         if (tmp_path != NULL) {
347                 unlink(tmp_path);
348                 TALLOC_FREE(tmp_path);
349         }
350         if (saved_errno != 0) {
351                 errno = saved_errno;
352         }
353         return ret;
354 }
355
356 static int rename_file_with_suffix(TALLOC_CTX *ctx, const char *path,
357                                    const char *suffix)
358 {
359         int ret = -1;
360         char *dst_path;
361
362         dst_path = talloc_asprintf(ctx, "%s%s", path, suffix);
363         if (dst_path == NULL) {
364                 DEBUG(3, ("error out of memory\n"));
365                 return ret;
366         }
367
368         ret = (rename(path, dst_path) != 0);
369
370         if (ret == 0) {
371                 DEBUG(5, ("moved '%s' to '%s'\n", path, dst_path));
372         } else if (errno == ENOENT) {
373                 DEBUG(3, ("file '%s' does not exist - so not moved\n", path));
374                 ret = 0;
375         } else {
376                 DEBUG(3, ("error renaming %s to %s: %s\n", path, dst_path,
377                           strerror(errno)));
378         }
379
380         TALLOC_FREE(dst_path);
381         return ret;
382 }
383
384 /*
385  * do a backup of a tdb, moving the destination out of the way first
386  */
387 static int tdb_backup_with_rotate(TALLOC_CTX *ctx, const char *src_path,
388                                   const char *dst_path, int hash_size,
389                                   const char *rotate_suffix,
390                                   bool retry_norotate_if_nospc,
391                                   bool rename_as_last_resort_if_nospc)
392 {
393         int ret;
394
395         rename_file_with_suffix(ctx, dst_path, rotate_suffix);
396
397         ret = tdb_backup(ctx, src_path, dst_path, hash_size);
398
399         if (ret != 0) {
400                 DEBUG(10, ("backup of %s failed: %s\n", src_path, strerror(errno)));
401         }
402         if ((ret != 0) && (errno == ENOSPC) && retry_norotate_if_nospc)
403         {
404                 char *rotate_path = talloc_asprintf(ctx, "%s%s", dst_path,
405                                                     rotate_suffix);
406                 if (rotate_path == NULL) {
407                         DEBUG(10, ("talloc fail\n"));
408                         return -1;
409                 }
410                 DEBUG(10, ("backup of %s failed due to lack of space\n",
411                            src_path));
412                 DEBUGADD(10, ("trying to free some space by removing rotated "
413                               "dst %s\n", rotate_path));
414                 if (unlink(rotate_path) == -1) {
415                         DEBUG(10, ("unlink of %s failed: %s\n", rotate_path,
416                                    strerror(errno)));
417                 } else {
418                         ret = tdb_backup(ctx, src_path, dst_path, hash_size);
419                 }
420                 TALLOC_FREE(rotate_path);
421         }
422
423         if ((ret != 0) && (errno == ENOSPC) && rename_as_last_resort_if_nospc)
424         {
425                 DEBUG(10, ("backup of %s failed due to lack of space\n", 
426                            src_path));
427                 DEBUGADD(10, ("using 'rename' as a last resort\n"));
428                 ret = rename(src_path, dst_path);
429         }
430
431         return ret;
432 }
433
434 /*
435  * validation function with backup handling:
436  *
437  *  - calls tdb_validate
438  *  - if the tdb is ok, create a backup "name.bak", possibly moving
439  *    existing backup to name.bak.old,
440  *    return 0 (success) even if the backup fails
441  *  - if the tdb is corrupt:
442  *    - move the tdb to "name.corrupt"
443  *    - check if there is valid backup.
444  *      if so, restore the backup.
445  *      if restore is successful, return 0 (success),
446  *    - otherwise return -1 (failure)
447  */
448 int tdb_validate_and_backup(const char *tdb_path,
449                             tdb_validate_data_func validate_fn)
450 {
451         int ret = -1;
452         const char *backup_suffix = ".bak";
453         const char *corrupt_suffix = ".corrupt";
454         const char *rotate_suffix = ".old";
455         char *tdb_path_backup;
456         struct stat st;
457         TALLOC_CTX *ctx = NULL;
458
459         ctx = talloc_new(NULL);
460         if (ctx == NULL) {
461                 DEBUG(0, ("tdb_validate_and_backup: out of memory\n"));
462                 goto done;
463         }
464
465         tdb_path_backup = talloc_asprintf(ctx, "%s%s", tdb_path, backup_suffix);
466         if (!tdb_path_backup) {
467                 DEBUG(0, ("tdb_validate_and_backup: out of memory\n"));
468                 goto done;
469         }
470
471         ret = tdb_validate_open(tdb_path, validate_fn);
472
473         if (ret == 0) {
474                 DEBUG(1, ("tdb '%s' is valid\n", tdb_path));
475                 ret = tdb_backup_with_rotate(ctx, tdb_path, tdb_path_backup, 0,
476                                              rotate_suffix, True, False);
477                 if (ret != 0) {
478                         DEBUG(1, ("Error creating backup of tdb '%s'\n",
479                                   tdb_path));
480                         /* the actual validation was successful: */
481                         ret = 0;
482                 } else {
483                         DEBUG(1, ("Created backup '%s' of tdb '%s'\n",
484                                   tdb_path_backup, tdb_path));
485                 }
486         } else {
487                 DEBUG(1, ("tdb '%s' is invalid\n", tdb_path));
488
489                 ret =stat(tdb_path_backup, &st);
490                 if (ret != 0) {
491                         DEBUG(5, ("Could not stat '%s': %s\n", tdb_path_backup,
492                                   strerror(errno)));
493                         DEBUG(1, ("No backup found.\n"));
494                 } else {
495                         DEBUG(1, ("backup '%s' found.\n", tdb_path_backup));
496                         ret = tdb_validate_open(tdb_path_backup, validate_fn);
497                         if (ret != 0) {
498                                 DEBUG(1, ("Backup '%s' is invalid.\n",
499                                           tdb_path_backup));
500                         }
501                 }
502
503                 if (ret != 0) {
504                         int renamed = rename_file_with_suffix(ctx, tdb_path,
505                                                               corrupt_suffix);
506                         if (renamed != 0) {
507                                 DEBUG(1, ("Error moving tdb to '%s%s'\n",
508                                           tdb_path, corrupt_suffix));
509                         } else {
510                                 DEBUG(1, ("Corrupt tdb stored as '%s%s'\n",
511                                           tdb_path, corrupt_suffix));
512                         }
513                         goto done;
514                 }
515
516                 DEBUG(1, ("valid backup '%s' found\n", tdb_path_backup));
517                 ret = tdb_backup_with_rotate(ctx, tdb_path_backup, tdb_path, 0,
518                                              corrupt_suffix, True, True);
519                 if (ret != 0) {
520                         DEBUG(1, ("Error restoring backup from '%s'\n",
521                                   tdb_path_backup));
522                 } else {
523                         DEBUG(1, ("Restored tdb backup from '%s'\n",
524                                   tdb_path_backup));
525                 }
526         }
527
528 done:
529         TALLOC_FREE(ctx);
530         return ret;
531 }