s4:dsdb: Fix stack use after scope in gkdi_create_root_key()
[samba.git] / lib / tdb / common / io.c
index ac21e3f67a1b00c30f0cb354e72a46ca52a015ac..0de0dabd82760d6ca29bfedae3595f4e5974bf92 100644 (file)
@@ -1,4 +1,4 @@
- /* 
+ /*
    Unix SMB/CIFS implementation.
 
    trivial database library
 
 #include "tdb_private.h"
 
+/*
+ * We prepend the mutex area, so fixup offsets. See mutex.c for details.
+ * tdb->hdr_ofs is 0 or header.mutex_size.
+ *
+ * Note: that we only have the 4GB limit of tdb_off_t for
+ * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
+ */
+
+static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
+{
+       off_t tmp = tdb->hdr_ofs + *off;
+
+       if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
+               errno = EIO;
+               return false;
+       }
+
+       *off = tmp;
+       return true;
+}
+
+static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
+                         size_t count, off_t offset)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+
+       do {
+               ret = pwrite(tdb->fd, buf, count, offset);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+
+static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
+                        size_t count, off_t offset)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+
+       do {
+               ret = pread(tdb->fd, buf, count, offset);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+
+static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &length)) {
+               return -1;
+       }
+
+       do {
+               ret = ftruncate(tdb->fd, length);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+
+#ifdef HAVE_POSIX_FALLOCATE
+static int tdb_posix_fallocate(struct tdb_context *tdb, off_t offset,
+                              off_t len)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+
+       do {
+               ret = posix_fallocate(tdb->fd, offset, len);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+#endif
+
+static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
+{
+       int ret;
+
+       ret = fstat(tdb->fd, buf);
+       if (ret == -1) {
+               return -1;
+       }
+
+       if (buf->st_size < tdb->hdr_ofs) {
+               errno = EIO;
+               return -1;
+       }
+       buf->st_size -= tdb->hdr_ofs;
+
+       return ret;
+}
+
 /* check for an out of bounds access - if it is out of bounds then
    see if the database has been expanded by someone else and expand
-   if necessary 
+   if necessary
 */
-static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
-                  int probe)
+static int tdb_notrans_oob(
+       struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, int probe)
 {
        struct stat st;
        if (len + off < len) {
                if (!probe) {
                        /* Ensure ecode is set for log fn. */
                        tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob off %d len %d wrap\n",
-                                (int)off, (int)len));
+                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob off %u len %u wrap\n",
+                                off, len));
                }
                return -1;
        }
 
+       /*
+        * This duplicates functionality from tdb_oob(). Don't remove:
+        * we still have direct callers of tdb->methods->tdb_oob()
+        * inside transaction.c.
+        */
        if (off + len <= tdb->map_size)
                return 0;
        if (tdb->flags & TDB_INTERNAL) {
@@ -58,21 +167,11 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
                return -1;
        }
 
-       if (fstat(tdb->fd, &st) == -1) {
+       if (tdb_fstat(tdb, &st) == -1) {
                tdb->ecode = TDB_ERR_IO;
                return -1;
        }
 
-       if (st.st_size < (size_t)off + len) {
-               if (!probe) {
-                       /* Ensure ecode is set for log fn. */
-                       tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n",
-                                (int)(off + len), (int)st.st_size));
-               }
-               return -1;
-       }
-
        /* Beware >4G files! */
        if ((tdb_off_t)st.st_size != st.st_size) {
                /* Ensure ecode is set for log fn. */
@@ -82,18 +181,35 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
                return -1;
        }
 
-       /* Unmap, update size, remap */
+       /* Unmap, update size, remap.  We do this unconditionally, to handle
+        * the unusual case where the db is truncated.
+        *
+        * This can happen to a child using tdb_reopen_all(true) on a
+        * TDB_CLEAR_IF_FIRST tdb whose parent crashes: the next
+        * opener will truncate the database. */
        if (tdb_munmap(tdb) == -1) {
                tdb->ecode = TDB_ERR_IO;
                return -1;
        }
        tdb->map_size = st.st_size;
-       tdb_mmap(tdb);
+       if (tdb_mmap(tdb) != 0) {
+               return -1;
+       }
+
+       if (st.st_size < (size_t)off + len) {
+               if (!probe) {
+                       /* Ensure ecode is set for log fn. */
+                       tdb->ecode = TDB_ERR_IO;
+                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n",
+                                (int)(off + len), (int)st.st_size));
+               }
+               return -1;
+       }
        return 0;
 }
 
 /* write a lump of data at a specified offset */
-static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
+static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
                     const void *buf, tdb_len_t len)
 {
        if (len == 0) {
@@ -105,36 +221,43 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
                return -1;
        }
 
-       if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0)
+       if (tdb_oob(tdb, off, len, 0) != 0)
                return -1;
 
        if (tdb->map_ptr) {
                memcpy(off + (char *)tdb->map_ptr, buf, len);
        } else {
-               ssize_t written = pwrite(tdb->fd, buf, len, off);
+#ifdef HAVE_INCOHERENT_MMAP
+               tdb->ecode = TDB_ERR_IO;
+               return -1;
+#else
+               ssize_t written;
+
+               written = tdb_pwrite(tdb, buf, len, off);
+
                if ((written != (ssize_t)len) && (written != -1)) {
                        /* try once more */
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
-                                "%d of %d bytes at %d, trying once more\n",
-                                (int)written, len, off));
-                       written = pwrite(tdb->fd, (const char *)buf+written,
-                                        len-written,
-                                        off+written);
+                                "%zi of %u bytes at %u, trying once more\n",
+                                written, len, off));
+                       written = tdb_pwrite(tdb, (const char *)buf+written,
+                                            len-written, off+written);
                }
                if (written == -1) {
                        /* Ensure ecode is set for log fn. */
                        tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
-                                "len=%d (%s)\n", off, len, strerror(errno)));
+                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %u "
+                                "len=%u (%s)\n", off, len, strerror(errno)));
                        return -1;
                } else if (written != (ssize_t)len) {
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
-                                "write %d bytes at %d in two attempts\n",
+                                "write %u bytes at %u in two attempts\n",
                                 len, off));
                        return -1;
                }
+#endif
        }
        return 0;
 }
@@ -150,26 +273,33 @@ void *tdb_convert(void *buf, uint32_t size)
 
 
 /* read a lump of data at a specified offset, maybe convert */
-static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 
+static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                    tdb_len_t len, int cv)
 {
-       if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) {
+       if (tdb_oob(tdb, off, len, 0) != 0) {
                return -1;
        }
 
        if (tdb->map_ptr) {
                memcpy(buf, off + (char *)tdb->map_ptr, len);
        } else {
-               ssize_t ret = pread(tdb->fd, buf, len, off);
+#ifdef HAVE_INCOHERENT_MMAP
+               tdb->ecode = TDB_ERR_IO;
+               return -1;
+#else
+               ssize_t ret;
+
+               ret = tdb_pread(tdb, buf, len, off);
                if (ret != (ssize_t)len) {
                        /* Ensure ecode is set for log fn. */
                        tdb->ecode = TDB_ERR_IO;
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
-                                "len=%d ret=%d (%s) map_size=%d\n",
-                                (int)off, (int)len, (int)ret, strerror(errno),
-                                (int)tdb->map_size));
+                       TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %u "
+                                "len=%u ret=%zi (%s) map_size=%u\n",
+                                off, len, ret, strerror(errno),
+                                tdb->map_size));
                        return -1;
                }
+#endif
        }
        if (cv) {
                tdb_convert(buf, len);
@@ -182,19 +312,19 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
 /*
   do an unlocked scan of the hash table heads to find the next non-zero head. The value
   will then be confirmed with the lock held
-*/             
+*/
 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
 {
        uint32_t h = *chain;
        if (tdb->map_ptr) {
-               for (;h < tdb->header.hash_size;h++) {
+               for (;h < tdb->hash_size;h++) {
                        if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
                                break;
                        }
                }
        } else {
                uint32_t off=0;
-               for (;h < tdb->header.hash_size;h++) {
+               for (;h < tdb->hash_size;h++) {
                        if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
                                break;
                        }
@@ -222,16 +352,27 @@ int tdb_munmap(struct tdb_context *tdb)
        return 0;
 }
 
-void tdb_mmap(struct tdb_context *tdb)
+/* If mmap isn't coherent, *everyone* must always mmap. */
+static bool should_mmap(const struct tdb_context *tdb)
+{
+#ifdef HAVE_INCOHERENT_MMAP
+       return true;
+#else
+       return !(tdb->flags & TDB_NOMMAP);
+#endif
+}
+
+int tdb_mmap(struct tdb_context *tdb)
 {
        if (tdb->flags & TDB_INTERNAL)
-               return;
+               return 0;
 
 #ifdef HAVE_MMAP
-       if (!(tdb->flags & TDB_NOMMAP)) {
-               tdb->map_ptr = mmap(NULL, tdb->map_size, 
-                                   PROT_READ|(tdb->read_only? 0:PROT_WRITE), 
-                                   MAP_SHARED|MAP_FILE, tdb->fd, 0);
+       if (should_mmap(tdb)) {
+               tdb->map_ptr = mmap(NULL, tdb->map_size,
+                                   PROT_READ|(tdb->read_only? 0:PROT_WRITE),
+                                   MAP_SHARED|MAP_FILE, tdb->fd,
+                                   tdb->hdr_ofs);
 
                /*
                 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
@@ -239,8 +380,12 @@ void tdb_mmap(struct tdb_context *tdb)
 
                if (tdb->map_ptr == MAP_FAILED) {
                        tdb->map_ptr = NULL;
-                       TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n", 
+                       TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %u (%s)\n",
                                 tdb->map_size, strerror(errno)));
+#ifdef HAVE_INCOHERENT_MMAP
+                       tdb->ecode = TDB_ERR_IO;
+                       return -1;
+#endif
                }
        } else {
                tdb->map_ptr = NULL;
@@ -248,6 +393,7 @@ void tdb_mmap(struct tdb_context *tdb)
 #else
        tdb->map_ptr = NULL;
 #endif
+       return 0;
 }
 
 /* expand a file.  we prefer to use ftruncate, as that is what posix
@@ -255,26 +401,67 @@ void tdb_mmap(struct tdb_context *tdb)
 static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
 {
        char buf[8192];
+       tdb_off_t new_size;
+       int ret;
 
        if (tdb->read_only || tdb->traverse_read) {
                tdb->ecode = TDB_ERR_RDONLY;
                return -1;
        }
 
-       if (ftruncate(tdb->fd, size+addition) == -1) {
+       if (!tdb_add_off_t(size, addition, &new_size)) {
+               tdb->ecode = TDB_ERR_OOM;
+               TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
+                       "overflow detected current size[%u] addition[%u]!\n",
+                       (unsigned)size, (unsigned)addition));
+               errno = ENOSPC;
+               return -1;
+       }
+
+#ifdef HAVE_POSIX_FALLOCATE
+       ret = tdb_posix_fallocate(tdb, size, addition);
+       if (ret == 0) {
+               return 0;
+       }
+       if (ret == ENOSPC) {
+               /*
+                * The Linux glibc (at least as of 2.24) fallback if
+                * the file system does not support fallocate does not
+                * reset the file size back to where it was. Also, to
+                * me it is unclear from the posix spec of
+                * posix_fallocate whether this is allowed or
+                * not. Better be safe than sorry and "goto fail" but
+                * "return -1" here, leaving the EOF pointer too
+                * large.
+                */
+               goto fail;
+       }
+
+       /*
+        * Retry the "old" way. Possibly unnecessary, but looking at
+        * our configure script there seem to be weird failure modes
+        * for posix_fallocate. See commit 3264a98ff16de, which
+        * probably refers to
+        * https://sourceware.org/bugzilla/show_bug.cgi?id=1083.
+        */
+#endif
+
+       ret = tdb_ftruncate(tdb, new_size);
+       if (ret == -1) {
                char b = 0;
-               ssize_t written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
+               ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                if (written == 0) {
                        /* try once more, potentially revealing errno */
-                       written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
+                       written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                }
                if (written == 0) {
                        /* again - give up, guessing errno */
                        errno = ENOSPC;
                }
                if (written != 1) {
-                       TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n", 
-                                size+addition, strerror(errno)));
+                       tdb->ecode = TDB_ERR_OOM;
+                       TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %u failed (%s)\n",
+                                (unsigned)new_size, strerror(errno)));
                        return -1;
                }
        }
@@ -285,45 +472,89 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
        memset(buf, TDB_PAD_BYTE, sizeof(buf));
        while (addition) {
                size_t n = addition>sizeof(buf)?sizeof(buf):addition;
-               ssize_t written = pwrite(tdb->fd, buf, n, size);
+               ssize_t written = tdb_pwrite(tdb, buf, n, size);
                if (written == 0) {
                        /* prevent infinite loops: try _once_ more */
-                       written = pwrite(tdb->fd, buf, n, size);
+                       written = tdb_pwrite(tdb, buf, n, size);
                }
                if (written == 0) {
                        /* give up, trying to provide a useful errno */
+                       tdb->ecode = TDB_ERR_OOM;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
                                "returned 0 twice: giving up!\n"));
                        errno = ENOSPC;
-                       return -1;
-               } else if (written == -1) {
+                       goto fail;
+               }
+               if (written == -1) {
+                       tdb->ecode = TDB_ERR_OOM;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
-                                "%d bytes failed (%s)\n", (int)n,
+                                "%u bytes failed (%s)\n", (int)n,
                                 strerror(errno)));
-                       return -1;
-               } else if (written != n) {
+                       goto fail;
+               }
+               if (written != n) {
                        TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
-                                "only %d of %d bytes - retrying\n", (int)written,
-                                (int)n));
+                                "only %zu of %zi bytes - retrying\n", written,
+                                n));
                }
                addition -= written;
                size += written;
        }
        return 0;
+
+fail:
+       {
+               int err = errno;
+
+               /*
+                * We're holding the freelist lock or are inside a
+                * transaction. Cutting the file is safe, the space we
+                * tried to allocate can't have been used anywhere in
+                * the meantime.
+                */
+
+               ret = tdb_ftruncate(tdb, size);
+               if (ret == -1) {
+                       TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: "
+                                "retruncate to %ju failed\n",
+                                (uintmax_t)size));
+               }
+               errno = err;
+       }
+
+       return -1;
 }
 
 
 /* You need 'size', this tells you how much you should expand by. */
 tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size)
 {
-       tdb_off_t new_size, top_size;
+       tdb_off_t new_size, top_size, increment;
+       tdb_off_t max_size = UINT32_MAX - map_size;
+
+       if (size > max_size) {
+               /*
+                * We can't round up anymore, just give back
+                * what we're asked for.
+                *
+                * The caller has to take care of the ENOSPC handling.
+                */
+               return size;
+       }
 
        /* limit size in order to avoid using up huge amounts of memory for
         * in memory tdbs if an oddball huge record creeps in */
        if (size > 100 * 1024) {
-               top_size = map_size + size * 2;
+               increment = size * 2;
        } else {
-               top_size = map_size + size * 100;
+               increment = size * 100;
+       }
+       if (increment < size) {
+               goto overflow;
+       }
+
+       if (!tdb_add_off_t(map_size, increment, &top_size)) {
+               goto overflow;
        }
 
        /* always make room for at least top_size more records, and at
@@ -334,10 +565,26 @@ tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size)
        } else {
                new_size = map_size * 1.25;
        }
+       if (new_size < map_size) {
+               goto overflow;
+       }
 
        /* Round the database up to a multiple of the page size */
        new_size = MAX(top_size, new_size);
+
+       if (new_size + page_size < new_size) {
+               /* There's a "+" in TDB_ALIGN that might overflow... */
+               goto overflow;
+       }
+
        return TDB_ALIGN(new_size, page_size) - map_size;
+
+overflow:
+       /*
+        * Somewhere in between we went over 4GB. Make one big jump to
+        * exactly 4GB database size.
+        */
+       return max_size;
 }
 
 /* expand the database at least size bytes by expanding the underlying
@@ -346,6 +593,7 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
 {
        struct tdb_record rec;
        tdb_off_t offset;
+       tdb_off_t new_size;
 
        if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
                TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
@@ -353,52 +601,62 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
        }
 
        /* must know about any previous expansions by another process */
-       tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
-
-       size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
-
-       if (!(tdb->flags & TDB_INTERNAL))
-               tdb_munmap(tdb);
+       tdb_oob(tdb, tdb->map_size, 1, 1);
 
        /*
-        * We must ensure the file is unmapped before doing this
-        * to ensure consistency with systems like OpenBSD where
-        * writes and mmaps are not consistent.
+        * Note: that we don't care about tdb->hdr_ofs != 0 here
+        *
+        * The 4GB limitation is just related to tdb->map_size
+        * and the offset calculation in the records.
+        *
+        * The file on disk can be up to 4GB + tdb->hdr_ofs
         */
+       size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
 
-       /* expand the file itself */
-       if (!(tdb->flags & TDB_INTERNAL)) {
-               if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
-                       goto fail;
+       if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
+               tdb->ecode = TDB_ERR_OOM;
+               TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_expand "
+                       "overflow detected current map_size[%u] size[%u]!\n",
+                       (unsigned)tdb->map_size, (unsigned)size));
+               goto fail;
        }
 
-       tdb->map_size += size;
+       /* form a new freelist record */
+       offset = tdb->map_size;
+       memset(&rec,'\0',sizeof(rec));
+       rec.rec_len = size - sizeof(rec);
 
        if (tdb->flags & TDB_INTERNAL) {
-               char *new_map_ptr = (char *)realloc(tdb->map_ptr,
-                                                   tdb->map_size);
+               char *new_map_ptr;
+
+               new_map_ptr = (char *)realloc(tdb->map_ptr, new_size);
                if (!new_map_ptr) {
-                       tdb->map_size -= size;
+                       tdb->ecode = TDB_ERR_OOM;
                        goto fail;
                }
                tdb->map_ptr = new_map_ptr;
+               tdb->map_size = new_size;
        } else {
+               int ret;
+
                /*
-                * We must ensure the file is remapped before adding the space
-                * to ensure consistency with systems like OpenBSD where
-                * writes and mmaps are not consistent.
+                * expand the file itself
                 */
+               ret = tdb->methods->tdb_expand_file(tdb, tdb->map_size, size);
+               if (ret != 0) {
+                       goto fail;
+               }
 
-               /* We're ok if the mmap fails as we'll fallback to read/write */
-               tdb_mmap(tdb);
+               /* Explicitly remap: if we're in a transaction, this won't
+                * happen automatically! */
+               tdb_munmap(tdb);
+               tdb->map_size = new_size;
+               if (tdb_mmap(tdb) != 0) {
+                       goto fail;
+               }
        }
 
-       /* form a new freelist record */
-       memset(&rec,'\0',sizeof(rec));
-       rec.rec_len = size - sizeof(rec);
-
        /* link it into the free list */
-       offset = tdb->map_size - size;
        if (tdb_free(tdb, offset, &rec) == -1)
                goto fail;
 
@@ -409,6 +667,12 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
        return -1;
 }
 
+int _tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, int probe)
+{
+       int ret = tdb->methods->tdb_oob(tdb, off, len, probe);
+       return ret;
+}
+
 /* read/write a tdb_off_t */
 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
 {
@@ -432,7 +696,7 @@ unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len
        if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
                /* Ensure ecode is set for log fn. */
                tdb->ecode = TDB_ERR_OOM;
-               TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
+               TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%u (%s)\n",
                           len, strerror(errno)));
                return NULL;
        }
@@ -461,7 +725,7 @@ int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
                 * Optimize by avoiding the malloc/memcpy/free, point the
                 * parser directly at the mmap area.
                 */
-               if (tdb->methods->tdb_oob(tdb, offset, len, 0) != 0) {
+               if (tdb_oob(tdb, offset, len, 0) != 0) {
                        return -1;
                }
                data.dptr = offset + (unsigned char *)tdb->map_ptr;
@@ -480,15 +744,43 @@ int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
 /* read/write a record */
 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
 {
+       int ret;
+       tdb_len_t overall_len;
+
        if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
                return -1;
        if (TDB_BAD_MAGIC(rec)) {
                /* Ensure ecode is set for log fn. */
                tdb->ecode = TDB_ERR_CORRUPT;
-               TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
+               TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset));
+               return -1;
+       }
+
+       overall_len = rec->key_len + rec->data_len;
+       if (overall_len < rec->data_len) {
+               /* overflow */
                return -1;
        }
-       return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0);
+
+       if (overall_len > rec->rec_len) {
+               /* invalid record */
+               return -1;
+       }
+
+       ret = tdb_oob(tdb, offset, rec->key_len, 1);
+       if (ret == -1) {
+               return -1;
+       }
+       ret = tdb_oob(tdb, offset, rec->data_len, 1);
+       if (ret == -1) {
+               return -1;
+       }
+       ret = tdb_oob(tdb, offset, rec->rec_len, 1);
+       if (ret == -1) {
+               return -1;
+       }
+
+       return tdb_oob(tdb, rec->next, sizeof(*rec), 0);
 }
 
 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
@@ -501,7 +793,7 @@ static const struct tdb_methods io_methods = {
        tdb_read,
        tdb_write,
        tdb_next_hash_chain,
-       tdb_oob,
+       tdb_notrans_oob,
        tdb_expand_file,
 };