s3: piddir creation fix part 2.
[ira/wip.git] / lib / tdb / common / check.c
index 2c640434ee42360dd573f6706b134b55b26d340f..313f55cbb05ed39632ddc99ff60a189f266be9ec 100644 (file)
@@ -28,8 +28,9 @@
 static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
 {
        struct tdb_header hdr;
+       uint32_t h1, h2;
 
-       if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), DOCONV()) == -1)
+       if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
                return false;
        if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
                goto corrupt;
@@ -38,7 +39,12 @@ static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
        if (hdr.version != TDB_VERSION)
                goto corrupt;
 
-       if (hdr.rwlocks != 0)
+       if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
+               goto corrupt;
+
+       tdb_header_hash(tdb, &h1, &h2);
+       if (hdr.magic1_hash && hdr.magic2_hash &&
+           (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
                goto corrupt;
 
        if (hdr.hash_size == 0)
@@ -86,7 +92,7 @@ static bool tdb_check_record(struct tdb_context *tdb,
                         off, rec->next));
                goto corrupt;
        }
-       if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
+       if (tdb->methods->tdb_oob(tdb, rec->nextsizeof(*rec), 0))
                goto corrupt;
 
        /* Check rec_len: similar to rec->next, implies next record. */
@@ -104,7 +110,7 @@ static bool tdb_check_record(struct tdb_context *tdb,
                goto corrupt;
        }
        /* OOB allows "right at the end" access, so this works for last rec. */
-       if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
+       if (tdb->methods->tdb_oob(tdb, offsizeof(*rec)+rec->rec_len, 0))
                goto corrupt;
 
        /* Check tailer. */
@@ -301,7 +307,22 @@ static bool tdb_check_free_record(struct tdb_context *tdb,
        return true;
 }
 
-int tdb_check(struct tdb_context *tdb,
+/* Slow, but should be very rare. */
+size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off)
+{
+       size_t len;
+
+       for (len = 0; off + len < tdb->map_size; len++) {
+               char c;
+               if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
+                       return 0;
+               if (c != 0 && c != 0x42)
+                       break;
+       }
+       return len;
+}
+
+_PUBLIC_ int tdb_check(struct tdb_context *tdb,
              int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
              void *private_data)
 {
@@ -310,12 +331,21 @@ int tdb_check(struct tdb_context *tdb,
        tdb_off_t off, recovery_start;
        struct tdb_record rec;
        bool found_recovery = false;
-
-       if (tdb_lockall_read(tdb) == -1)
-               return -1;
+       tdb_len_t dead;
+       bool locked;
+
+       /* Read-only databases use no locking at all: it's best-effort.
+        * We may have a write lock already, so skip that case too. */
+       if (tdb->read_only || tdb->allrecord_lock.count != 0) {
+               locked = false;
+       } else {
+               if (tdb_lockall_read(tdb) == -1)
+                       return -1;
+               locked = true;
+       }
 
        /* Make sure we know true size of the underlying file. */
-       tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
+       tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
 
        /* Header must be OK: also gets us the recovery ptr, if any. */
        if (!tdb_check_header(tdb, &recovery_start))
@@ -369,8 +399,23 @@ int tdb_check(struct tdb_context *tdb,
                        if (!tdb_check_free_record(tdb, off, &rec, hashes))
                                goto free;
                        break;
-               case TDB_RECOVERY_MAGIC:
+               /* If we crash after ftruncate, we can get zeroes or fill. */
                case TDB_RECOVERY_INVALID_MAGIC:
+               case 0x42424242:
+                       if (recovery_start == off) {
+                               found_recovery = true;
+                               break;
+                       }
+                       dead = tdb_dead_space(tdb, off);
+                       if (dead < sizeof(rec))
+                               goto corrupt;
+
+                       TDB_LOG((tdb, TDB_DEBUG_ERROR,
+                                "Dead space at %d-%d (of %u)\n",
+                                off, off + dead, tdb->map_size));
+                       rec.rec_len = dead - sizeof(rec);
+                       break;
+               case TDB_RECOVERY_MAGIC:
                        if (recovery_start != off) {
                                TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                         "Unexpected recovery record at offset %d\n",
@@ -379,7 +424,8 @@ int tdb_check(struct tdb_context *tdb,
                        }
                        found_recovery = true;
                        break;
-               default:
+               default: ;
+               corrupt:
                        tdb->ecode = TDB_ERR_CORRUPT;
                        TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                 "Bad magic 0x%x at offset %d\n",
@@ -405,19 +451,22 @@ int tdb_check(struct tdb_context *tdb,
        /* We must have found recovery area if there was one. */
        if (recovery_start != 0 && !found_recovery) {
                TDB_LOG((tdb, TDB_DEBUG_ERROR,
-                        "Expected %s recovery area, got %s\n",
-                        recovery_start ? "a" : "no",
-                        found_recovery ? "one" : "none"));
+                        "Expected a recovery area at %u\n",
+                        recovery_start));
                goto free;
        }
 
        free(hashes);
-       tdb_unlockall_read(tdb);
+       if (locked) {
+               tdb_unlockall_read(tdb);
+       }
        return 0;
 
 free:
        free(hashes);
 unlock:
-       tdb_unlockall_read(tdb);
+       if (locked) {
+               tdb_unlockall_read(tdb);
+       }
        return -1;
 }