2592402afd8b7b3e13eaabe41f661c2733c99f6c
[metze/samba/wip.git] / source3 / lib / tdb_validate.c
1 /*
2  * Unix SMB/CIFS implementation.
3  *
4  * A general tdb content validation mechanism
5  *
6  * Copyright (C) Michael Adam      2007
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21
22 #include "includes.h"
23 #include "system/filesys.h"
24 #include "util_tdb.h"
25 #include "tdb_validate.h"
26
27 /*
28  * internal validation function, executed by the child.
29  */
30 static int tdb_validate_child(struct tdb_context *tdb,
31                               tdb_validate_data_func validate_fn)
32 {
33         int ret = 1;
34         int num_entries = 0;
35         struct tdb_validation_status v_status;
36
37         v_status.tdb_error = False;
38         v_status.bad_freelist = False;
39         v_status.bad_entry = False;
40         v_status.unknown_key = False;
41         v_status.success = True;
42
43         if (!tdb) {
44                 v_status.tdb_error = True;
45                 v_status.success = False;
46                 goto out;
47         }
48
49         /*
50          * we can simplify this by passing a check function,
51          * but I don't want to change all the callers...
52          */
53         ret = tdb_check(tdb, NULL, NULL);
54         if (ret != 0) {
55                 v_status.tdb_error = True;
56                 v_status.success = False;
57                 goto out;
58         }
59
60         /* Check if the tdb's freelist is good. */
61         if (tdb_validate_freelist(tdb, &num_entries) == -1) {
62                 v_status.bad_freelist = True;
63                 v_status.success = False;
64                 goto out;
65         }
66
67         DEBUG(10,("tdb_validate_child: tdb %s freelist has %d entries\n",
68                   tdb_name(tdb), num_entries));
69
70         /* Now traverse the tdb to validate it. */
71         num_entries = tdb_traverse(tdb, validate_fn, (void *)&v_status);
72         if (!v_status.success) {
73                 goto out;
74         } else if (num_entries < 0) {
75                 v_status.tdb_error = True;
76                 v_status.success = False;
77                 goto out;
78         }
79
80         DEBUG(10,("tdb_validate_child: tdb %s is good with %d entries\n",
81                   tdb_name(tdb), num_entries));
82         ret = 0; /* Cache is good. */
83
84 out:
85         DEBUG(10,   ("tdb_validate_child: summary of validation status:\n"));
86         DEBUGADD(10,(" * tdb error: %s\n", v_status.tdb_error ? "yes" : "no"));
87         DEBUGADD(10,(" * bad freelist: %s\n",v_status.bad_freelist?"yes":"no"));
88         DEBUGADD(10,(" * bad entry: %s\n", v_status.bad_entry ? "yes" : "no"));
89         DEBUGADD(10,(" * unknown key: %s\n", v_status.unknown_key?"yes":"no"));
90         DEBUGADD(10,(" => overall success: %s\n", v_status.success?"yes":"no"));
91
92         return ret;
93 }
94
95 /*
96  * tdb validation function.
97  * returns 0 if tdb is ok, != 0 if it isn't.
98  * this function expects an opened tdb.
99  */
100 int tdb_validate(struct tdb_context *tdb, tdb_validate_data_func validate_fn)
101 {
102         pid_t child_pid = -1;
103         int child_status = 0;
104         int wait_pid = 0;
105         int ret = 1;
106
107         if (tdb == NULL) {
108                 DEBUG(1, ("Error: tdb_validate called with tdb == NULL\n"));
109                 return ret;
110         }
111
112         DEBUG(5, ("tdb_validate called for tdb '%s'\n", tdb_name(tdb)));
113
114         /* fork and let the child do the validation.
115          * benefit: no need to twist signal handlers and panic functions.
116          * just let the child panic. we catch the signal. */
117
118         DEBUG(10, ("tdb_validate: forking to let child do validation.\n"));
119         child_pid = fork();
120         if (child_pid == 0) {
121                 /* child code */
122                 DEBUG(10, ("tdb_validate (validation child): created\n"));
123                 DEBUG(10, ("tdb_validate (validation child): "
124                            "calling tdb_validate_child\n"));
125                 exit(tdb_validate_child(tdb, validate_fn));
126         }
127         else if (child_pid < 0) {
128                 DEBUG(1, ("tdb_validate: fork for validation failed.\n"));
129                 goto done;
130         }
131
132         /* parent */
133
134         DEBUG(10, ("tdb_validate: fork succeeded, child PID = %u\n",
135                 (unsigned int)child_pid));
136
137         DEBUG(10, ("tdb_validate: waiting for child to finish...\n"));
138         while  ((wait_pid = sys_waitpid(child_pid, &child_status, 0)) < 0) {
139                 if (errno == EINTR) {
140                         DEBUG(10, ("tdb_validate: got signal during waitpid, "
141                                    "retrying\n"));
142                         errno = 0;
143                         continue;
144                 }
145                 DEBUG(1, ("tdb_validate: waitpid failed with error '%s'.\n",
146                           strerror(errno)));
147                 goto done;
148         }
149         if (wait_pid != child_pid) {
150                 DEBUG(1, ("tdb_validate: waitpid returned pid %d, "
151                           "but %u was expected\n", wait_pid, (unsigned int)child_pid));
152                 goto done;
153         }
154
155         DEBUG(10, ("tdb_validate: validating child returned.\n"));
156         if (WIFEXITED(child_status)) {
157                 DEBUG(10, ("tdb_validate: child exited, code %d.\n",
158                            WEXITSTATUS(child_status)));
159                 ret = WEXITSTATUS(child_status);
160         }
161         if (WIFSIGNALED(child_status)) {
162                 DEBUG(10, ("tdb_validate: child terminated by signal %d\n",
163                            WTERMSIG(child_status)));
164 #ifdef WCOREDUMP
165                 if (WCOREDUMP(child_status)) {
166                         DEBUGADD(10, ("core dumped\n"));
167                 }
168 #endif
169                 ret = WTERMSIG(child_status);
170         }
171         if (WIFSTOPPED(child_status)) {
172                 DEBUG(10, ("tdb_validate: child was stopped by signal %d\n",
173                            WSTOPSIG(child_status)));
174                 ret = WSTOPSIG(child_status);
175         }
176
177 done:
178         DEBUG(5, ("tdb_validate returning code '%d' for tdb '%s'\n", ret,
179                   tdb_name(tdb)));
180
181         return ret;
182 }
183
184 /*
185  * tdb validation function.
186  * returns 0 if tdb is ok, != 0 if it isn't.
187  * this is a wrapper around the actual validation function that opens and closes
188  * the tdb.
189  */
190 int tdb_validate_open(const char *tdb_path, tdb_validate_data_func validate_fn)
191 {
192         TDB_CONTEXT *tdb = NULL;
193         int ret = 1;
194
195         DEBUG(5, ("tdb_validate_open called for tdb '%s'\n", tdb_path));
196
197         tdb = tdb_open_log(tdb_path, 0, TDB_DEFAULT, O_RDWR, 0);
198         if (!tdb) {
199                 DEBUG(1, ("Error opening tdb %s\n", tdb_path));
200                 return ret;
201         }
202
203         ret = tdb_validate(tdb, validate_fn);
204         tdb_close(tdb);
205         return ret;
206 }
207
208 /*
209  * tdb backup function and helpers for tdb_validate wrapper with backup
210  * handling.
211  */
212
213 /* this structure eliminates the need for a global overall status for
214  * the traverse-copy */
215 struct tdb_copy_data {
216         struct tdb_context *dst;
217         bool success;
218 };
219
220 static int traverse_copy_fn(struct tdb_context *tdb, TDB_DATA key,
221                             TDB_DATA dbuf, void *private_data)
222 {
223         struct tdb_copy_data *data = (struct tdb_copy_data *)private_data;
224
225         if (tdb_store(data->dst, key, dbuf, TDB_INSERT) != 0) {
226                 DEBUG(4, ("Failed to insert into %s: %s\n", tdb_name(data->dst),
227                           strerror(errno)));
228                 data->success = False;
229                 return 1;
230         }
231         return 0;
232 }
233
234 static int tdb_copy(struct tdb_context *src, struct tdb_context *dst)
235 {
236         struct tdb_copy_data data;
237         int count;
238
239         data.dst = dst;
240         data.success = True;
241
242         count = tdb_traverse(src, traverse_copy_fn, (void *)(&data));
243         if ((count < 0) || (data.success == False)) {
244                 return -1;
245         }
246         return count;
247 }
248
249 static int tdb_verify_basic(struct tdb_context *tdb)
250 {
251         return tdb_traverse(tdb, NULL, NULL);
252 }
253
254 /* this backup function is essentially taken from lib/tdb/tools/tdbbackup.tdb
255  */
256 static int tdb_backup(TALLOC_CTX *ctx, const char *src_path,
257                       const char *dst_path, int hash_size)
258 {
259         struct tdb_context *src_tdb = NULL;
260         struct tdb_context *dst_tdb = NULL;
261         char *tmp_path = NULL;
262         struct stat st;
263         int count1, count2;
264         int saved_errno = 0;
265         int ret = -1;
266
267         if (stat(src_path, &st) != 0) {
268                 DEBUG(3, ("Could not stat '%s': %s\n", src_path,
269                           strerror(errno)));
270                 goto done;
271         }
272
273         /* open old tdb RDWR - so we can lock it */
274         src_tdb = tdb_open_log(src_path, 0, TDB_DEFAULT, O_RDWR, 0);
275         if (src_tdb == NULL) {
276                 DEBUG(3, ("Failed to open tdb '%s'\n", src_path));
277                 goto done;
278         }
279
280         if (tdb_lockall(src_tdb) != 0) {
281                 DEBUG(3, ("Failed to lock tdb '%s'\n", src_path));
282                 goto done;
283         }
284
285         tmp_path = talloc_asprintf(ctx, "%s%s", dst_path, ".tmp");
286         if (!tmp_path) {
287                 DEBUG(3, ("talloc fail\n"));
288                 goto done;
289         }
290
291         unlink(tmp_path);
292
293         if (!hash_size) {
294                 hash_size = tdb_hash_size(src_tdb);
295         }
296
297         dst_tdb = tdb_open_log(tmp_path, hash_size,
298                                TDB_DEFAULT, O_RDWR | O_CREAT | O_EXCL,
299                                st.st_mode & 0777);
300         if (dst_tdb == NULL) {
301                 DEBUG(3, ("Error creating tdb '%s': %s\n", tmp_path,
302                           strerror(errno)));
303                 saved_errno = errno;
304                 unlink(tmp_path);
305                 goto done;
306         }
307
308         count1 = tdb_copy(src_tdb, dst_tdb);
309         if (count1 < 0) {
310                 DEBUG(3, ("Failed to copy tdb '%s': %s\n", src_path,
311                           strerror(errno)));
312                 tdb_close(dst_tdb);
313                 goto done;
314         }
315
316         /* reopen ro and do basic verification */
317         tdb_close(dst_tdb);
318         dst_tdb = tdb_open_log(tmp_path, 0, TDB_DEFAULT, O_RDONLY, 0);
319         if (!dst_tdb) {
320                 DEBUG(3, ("Failed to reopen tdb '%s': %s\n", tmp_path,
321                           strerror(errno)));
322                 goto done;
323         }
324         count2 = tdb_verify_basic(dst_tdb);
325         if (count2 != count1) {
326                 DEBUG(3, ("Failed to verify result of copying tdb '%s'.\n",
327                           src_path));
328                 tdb_close(dst_tdb);
329                 goto done;
330         }
331
332         DEBUG(10, ("tdb_backup: successfully copied %d entries\n", count1));
333
334         /* make sure the new tdb has reached stable storage
335          * then rename it to its destination */
336         fsync(tdb_fd(dst_tdb));
337         tdb_close(dst_tdb);
338         unlink(dst_path);
339         if (rename(tmp_path, dst_path) != 0) {
340                 DEBUG(3, ("Failed to rename '%s' to '%s': %s\n",
341                           tmp_path, dst_path, strerror(errno)));
342                 goto done;
343         }
344
345         /* success */
346         ret = 0;
347
348 done:
349         if (src_tdb != NULL) {
350                 tdb_close(src_tdb);
351         }
352         if (tmp_path != NULL) {
353                 unlink(tmp_path);
354                 TALLOC_FREE(tmp_path);
355         }
356         if (saved_errno != 0) {
357                 errno = saved_errno;
358         }
359         return ret;
360 }
361
362 static int rename_file_with_suffix(TALLOC_CTX *ctx, const char *path,
363                                    const char *suffix)
364 {
365         int ret = -1;
366         char *dst_path;
367
368         dst_path = talloc_asprintf(ctx, "%s%s", path, suffix);
369         if (dst_path == NULL) {
370                 DEBUG(3, ("error out of memory\n"));
371                 return ret;
372         }
373
374         ret = (rename(path, dst_path) != 0);
375
376         if (ret == 0) {
377                 DEBUG(5, ("moved '%s' to '%s'\n", path, dst_path));
378         } else if (errno == ENOENT) {
379                 DEBUG(3, ("file '%s' does not exist - so not moved\n", path));
380                 ret = 0;
381         } else {
382                 DEBUG(3, ("error renaming %s to %s: %s\n", path, dst_path,
383                           strerror(errno)));
384         }
385
386         TALLOC_FREE(dst_path);
387         return ret;
388 }
389
390 /*
391  * do a backup of a tdb, moving the destination out of the way first
392  */
393 static int tdb_backup_with_rotate(TALLOC_CTX *ctx, const char *src_path,
394                                   const char *dst_path, int hash_size,
395                                   const char *rotate_suffix,
396                                   bool retry_norotate_if_nospc,
397                                   bool rename_as_last_resort_if_nospc)
398 {
399         int ret;
400
401         rename_file_with_suffix(ctx, dst_path, rotate_suffix);
402
403         ret = tdb_backup(ctx, src_path, dst_path, hash_size);
404
405         if (ret != 0) {
406                 DEBUG(10, ("backup of %s failed: %s\n", src_path, strerror(errno)));
407         }
408         if ((ret != 0) && (errno == ENOSPC) && retry_norotate_if_nospc)
409         {
410                 char *rotate_path = talloc_asprintf(ctx, "%s%s", dst_path,
411                                                     rotate_suffix);
412                 if (rotate_path == NULL) {
413                         DEBUG(10, ("talloc fail\n"));
414                         return -1;
415                 }
416                 DEBUG(10, ("backup of %s failed due to lack of space\n",
417                            src_path));
418                 DEBUGADD(10, ("trying to free some space by removing rotated "
419                               "dst %s\n", rotate_path));
420                 if (unlink(rotate_path) == -1) {
421                         DEBUG(10, ("unlink of %s failed: %s\n", rotate_path,
422                                    strerror(errno)));
423                 } else {
424                         ret = tdb_backup(ctx, src_path, dst_path, hash_size);
425                 }
426                 TALLOC_FREE(rotate_path);
427         }
428
429         if ((ret != 0) && (errno == ENOSPC) && rename_as_last_resort_if_nospc)
430         {
431                 DEBUG(10, ("backup of %s failed due to lack of space\n", 
432                            src_path));
433                 DEBUGADD(10, ("using 'rename' as a last resort\n"));
434                 ret = rename(src_path, dst_path);
435         }
436
437         return ret;
438 }
439
440 /*
441  * validation function with backup handling:
442  *
443  *  - calls tdb_validate
444  *  - if the tdb is ok, create a backup "name.bak", possibly moving
445  *    existing backup to name.bak.old,
446  *    return 0 (success) even if the backup fails
447  *  - if the tdb is corrupt:
448  *    - move the tdb to "name.corrupt"
449  *    - check if there is valid backup.
450  *      if so, restore the backup.
451  *      if restore is successful, return 0 (success),
452  *    - otherwise return -1 (failure)
453  */
454 int tdb_validate_and_backup(const char *tdb_path,
455                             tdb_validate_data_func validate_fn)
456 {
457         int ret = -1;
458         const char *backup_suffix = ".bak";
459         const char *corrupt_suffix = ".corrupt";
460         const char *rotate_suffix = ".old";
461         char *tdb_path_backup;
462         struct stat st;
463         TALLOC_CTX *ctx = NULL;
464
465         ctx = talloc_new(NULL);
466         if (ctx == NULL) {
467                 DEBUG(0, ("tdb_validate_and_backup: out of memory\n"));
468                 goto done;
469         }
470
471         tdb_path_backup = talloc_asprintf(ctx, "%s%s", tdb_path, backup_suffix);
472         if (!tdb_path_backup) {
473                 DEBUG(0, ("tdb_validate_and_backup: out of memory\n"));
474                 goto done;
475         }
476
477         ret = tdb_validate_open(tdb_path, validate_fn);
478
479         if (ret == 0) {
480                 DEBUG(1, ("tdb '%s' is valid\n", tdb_path));
481                 ret = tdb_backup_with_rotate(ctx, tdb_path, tdb_path_backup, 0,
482                                              rotate_suffix, True, False);
483                 if (ret != 0) {
484                         DEBUG(1, ("Error creating backup of tdb '%s'\n",
485                                   tdb_path));
486                         /* the actual validation was successful: */
487                         ret = 0;
488                 } else {
489                         DEBUG(1, ("Created backup '%s' of tdb '%s'\n",
490                                   tdb_path_backup, tdb_path));
491                 }
492         } else {
493                 DEBUG(1, ("tdb '%s' is invalid\n", tdb_path));
494
495                 ret =stat(tdb_path_backup, &st);
496                 if (ret != 0) {
497                         DEBUG(5, ("Could not stat '%s': %s\n", tdb_path_backup,
498                                   strerror(errno)));
499                         DEBUG(1, ("No backup found.\n"));
500                 } else {
501                         DEBUG(1, ("backup '%s' found.\n", tdb_path_backup));
502                         ret = tdb_validate_open(tdb_path_backup, validate_fn);
503                         if (ret != 0) {
504                                 DEBUG(1, ("Backup '%s' is invalid.\n",
505                                           tdb_path_backup));
506                         }
507                 }
508
509                 if (ret != 0) {
510                         int renamed = rename_file_with_suffix(ctx, tdb_path,
511                                                               corrupt_suffix);
512                         if (renamed != 0) {
513                                 DEBUG(1, ("Error moving tdb to '%s%s'\n",
514                                           tdb_path, corrupt_suffix));
515                         } else {
516                                 DEBUG(1, ("Corrupt tdb stored as '%s%s'\n",
517                                           tdb_path, corrupt_suffix));
518                         }
519                         goto done;
520                 }
521
522                 DEBUG(1, ("valid backup '%s' found\n", tdb_path_backup));
523                 ret = tdb_backup_with_rotate(ctx, tdb_path_backup, tdb_path, 0,
524                                              corrupt_suffix, True, True);
525                 if (ret != 0) {
526                         DEBUG(1, ("Error restoring backup from '%s'\n",
527                                   tdb_path_backup));
528                 } else {
529                         DEBUG(1, ("Restored tdb backup from '%s'\n",
530                                   tdb_path_backup));
531                 }
532         }
533
534 done:
535         TALLOC_FREE(ctx);
536         return ret;
537 }