tdb: Harden tdb_rec_read
[samba.git] / lib / tdb / common / io.c
index 87d47b970b0a239117c9d20903d8929837c7668a..94b316331c123396581722c47dd2b4faee42313b 100644 (file)
 
 #include "tdb_private.h"
 
+/*
+ * We prepend the mutex area, so fixup offsets. See mutex.c for details.
+ * tdb->hdr_ofs is 0 or header.mutex_size.
+ *
+ * Note: that we only have the 4GB limit of tdb_off_t for
+ * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
+ */
+
+static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
+{
+       off_t tmp = tdb->hdr_ofs + *off;
+
+       if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
+               errno = EIO;
+               return false;
+       }
+
+       *off = tmp;
+       return true;
+}
+
+static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
+                         size_t count, off_t offset)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+
+       do {
+               ret = pwrite(tdb->fd, buf, count, offset);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+
+static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
+                        size_t count, off_t offset)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+
+       do {
+               ret = pread(tdb->fd, buf, count, offset);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+
+static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &length)) {
+               return -1;
+       }
+
+       do {
+               ret = ftruncate(tdb->fd, length);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+
+#if HAVE_POSIX_FALLOCATE
+static int tdb_posix_fallocate(struct tdb_context *tdb, off_t offset,
+                              off_t len)
+{
+       ssize_t ret;
+
+       if (!tdb_adjust_offset(tdb, &offset)) {
+               return -1;
+       }
+
+       do {
+               ret = posix_fallocate(tdb->fd, offset, len);
+       } while ((ret == -1) && (errno == EINTR));
+
+       return ret;
+}
+#endif
+
+static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
+{
+       int ret;
+
+       ret = fstat(tdb->fd, buf);
+       if (ret == -1) {
+               return -1;
+       }
+
+       if (buf->st_size < tdb->hdr_ofs) {
+               errno = EIO;
+               return -1;
+       }
+       buf->st_size -= tdb->hdr_ofs;
+
+       return ret;
+}
+
 /* check for an out of bounds access - if it is out of bounds then
    see if the database has been expanded by someone else and expand
    if necessary
@@ -58,7 +162,7 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
                return -1;
        }
 
-       if (fstat(tdb->fd, &st) == -1) {
+       if (tdb_fstat(tdb, &st) == -1) {
                tdb->ecode = TDB_ERR_IO;
                return -1;
        }
@@ -122,16 +226,18 @@ static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
                tdb->ecode = TDB_ERR_IO;
                return -1;
 #else
-               ssize_t written = pwrite(tdb->fd, buf, len, off);
+               ssize_t written;
+
+               written = tdb_pwrite(tdb, buf, len, off);
+
                if ((written != (ssize_t)len) && (written != -1)) {
                        /* try once more */
                        tdb->ecode = TDB_ERR_IO;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
                                 "%zi of %u bytes at %u, trying once more\n",
                                 written, len, off));
-                       written = pwrite(tdb->fd, (const char *)buf+written,
-                                        len-written,
-                                        off+written);
+                       written = tdb_pwrite(tdb, (const char *)buf+written,
+                                            len-written, off+written);
                }
                if (written == -1) {
                        /* Ensure ecode is set for log fn. */
@@ -176,7 +282,9 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                tdb->ecode = TDB_ERR_IO;
                return -1;
 #else
-               ssize_t ret = pread(tdb->fd, buf, len, off);
+               ssize_t ret;
+
+               ret = tdb_pread(tdb, buf, len, off);
                if (ret != (ssize_t)len) {
                        /* Ensure ecode is set for log fn. */
                        tdb->ecode = TDB_ERR_IO;
@@ -258,7 +366,8 @@ int tdb_mmap(struct tdb_context *tdb)
        if (should_mmap(tdb)) {
                tdb->map_ptr = mmap(NULL, tdb->map_size,
                                    PROT_READ|(tdb->read_only? 0:PROT_WRITE),
-                                   MAP_SHARED|MAP_FILE, tdb->fd, 0);
+                                   MAP_SHARED|MAP_FILE, tdb->fd,
+                                   tdb->hdr_ofs);
 
                /*
                 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
@@ -288,6 +397,7 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
 {
        char buf[8192];
        tdb_off_t new_size;
+       int ret;
 
        if (tdb->read_only || tdb->traverse_read) {
                tdb->ecode = TDB_ERR_RDONLY;
@@ -303,12 +413,41 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
                return -1;
        }
 
-       if (ftruncate(tdb->fd, new_size) == -1) {
+#if HAVE_POSIX_FALLOCATE
+       ret = tdb_posix_fallocate(tdb, size, addition);
+       if (ret == 0) {
+               return 0;
+       }
+       if (ret == ENOSPC) {
+               /*
+                * The Linux glibc (at least as of 2.24) fallback if
+                * the file system does not support fallocate does not
+                * reset the file size back to where it was. Also, to
+                * me it is unclear from the posix spec of
+                * posix_fallocate whether this is allowed or
+                * not. Better be safe than sorry and "goto fail" but
+                * "return -1" here, leaving the EOF pointer too
+                * large.
+                */
+               goto fail;
+       }
+
+       /*
+        * Retry the "old" way. Possibly unnecessary, but looking at
+        * our configure script there seem to be weird failure modes
+        * for posix_fallocate. See commit 3264a98ff16de, which
+        * probably refers to
+        * https://sourceware.org/bugzilla/show_bug.cgi?id=1083.
+        */
+#endif
+
+       ret = tdb_ftruncate(tdb, new_size);
+       if (ret == -1) {
                char b = 0;
-               ssize_t written = pwrite(tdb->fd,  &b, 1, new_size - 1);
+               ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                if (written == 0) {
                        /* try once more, potentially revealing errno */
-                       written = pwrite(tdb->fd,  &b, 1, new_size - 1);
+                       written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                }
                if (written == 0) {
                        /* again - give up, guessing errno */
@@ -328,10 +467,10 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
        memset(buf, TDB_PAD_BYTE, sizeof(buf));
        while (addition) {
                size_t n = addition>sizeof(buf)?sizeof(buf):addition;
-               ssize_t written = pwrite(tdb->fd, buf, n, size);
+               ssize_t written = tdb_pwrite(tdb, buf, n, size);
                if (written == 0) {
                        /* prevent infinite loops: try _once_ more */
-                       written = pwrite(tdb->fd, buf, n, size);
+                       written = tdb_pwrite(tdb, buf, n, size);
                }
                if (written == 0) {
                        /* give up, trying to provide a useful errno */
@@ -339,14 +478,14 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
                                "returned 0 twice: giving up!\n"));
                        errno = ENOSPC;
-                       return -1;
+                       goto fail;
                }
                if (written == -1) {
                        tdb->ecode = TDB_ERR_OOM;
                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
                                 "%u bytes failed (%s)\n", (int)n,
                                 strerror(errno)));
-                       return -1;
+                       goto fail;
                }
                if (written != n) {
                        TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
@@ -357,6 +496,28 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
                size += written;
        }
        return 0;
+
+fail:
+       {
+               int err = errno;
+
+               /*
+                * We're holding the freelist lock or are inside a
+                * transaction. Cutting the file is safe, the space we
+                * tried to allocate can't have been used anywhere in
+                * the meantime.
+                */
+
+               ret = tdb_ftruncate(tdb, size);
+               if (ret == -1) {
+                       TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: "
+                                "retruncate to %ju failed\n",
+                                (uintmax_t)size));
+               }
+               errno = err;
+       }
+
+       return -1;
 }
 
 
@@ -405,6 +566,12 @@ tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size)
 
        /* Round the database up to a multiple of the page size */
        new_size = MAX(top_size, new_size);
+
+       if (new_size + page_size < new_size) {
+               /* There's a "+" in TDB_ALIGN that might overflow... */
+               goto overflow;
+       }
+
        return TDB_ALIGN(new_size, page_size) - map_size;
 
 overflow:
@@ -431,6 +598,14 @@ int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
        /* must know about any previous expansions by another process */
        tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
 
+       /*
+        * Note: that we don't care about tdb->hdr_ofs != 0 here
+        *
+        * The 4GB limitation is just related to tdb->map_size
+        * and the offset calculation in the records.
+        *
+        * The file on disk can be up to 4GB + tdb->hdr_ofs
+        */
        size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
 
        if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
@@ -558,6 +733,9 @@ int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
 /* read/write a record */
 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
 {
+       int ret;
+       tdb_len_t overall_len;
+
        if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
                return -1;
        if (TDB_BAD_MAGIC(rec)) {
@@ -566,6 +744,31 @@ int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *r
                TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset));
                return -1;
        }
+
+       overall_len = rec->key_len + rec->data_len;
+       if (overall_len < rec->data_len) {
+               /* overflow */
+               return -1;
+       }
+
+       if (overall_len > rec->rec_len) {
+               /* invalid record */
+               return -1;
+       }
+
+       ret = tdb->methods->tdb_oob(tdb, offset, rec->key_len, 1);
+       if (ret == -1) {
+               return -1;
+       }
+       ret = tdb->methods->tdb_oob(tdb, offset, rec->data_len, 1);
+       if (ret == -1) {
+               return -1;
+       }
+       ret = tdb->methods->tdb_oob(tdb, offset, rec->rec_len, 1);
+       if (ret == -1) {
+               return -1;
+       }
+
        return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0);
 }