2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include "tdb_private.h"
33 static ssize_t pread(int fd, void *buf, size_t count, off_t offset)
35 if (lseek(fd, offset, SEEK_SET) != offset) {
39 return read(fd, buf, count);
44 static ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset)
46 if (lseek(fd, offset, SEEK_SET) != offset) {
50 return write(fd, buf, count);
54 /* check for an out of bounds access - if it is out of bounds then
55 see if the database has been expanded by someone else and expand
57 note that "len" is the minimum length needed for the db
59 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
62 if (len <= tdb->map_size)
64 if (tdb->flags & TDB_INTERNAL) {
66 /* Ensure ecode is set for log fn. */
67 tdb->ecode = TDB_ERR_IO;
68 TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
69 (int)len, (int)tdb->map_size));
71 return TDB_ERRCODE(TDB_ERR_IO, -1);
74 if (fstat(tdb->fd, &st) == -1) {
75 return TDB_ERRCODE(TDB_ERR_IO, -1);
78 if (st.st_size < (size_t)len) {
80 /* Ensure ecode is set for log fn. */
81 tdb->ecode = TDB_ERR_IO;
82 TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
83 (int)len, (int)st.st_size));
85 return TDB_ERRCODE(TDB_ERR_IO, -1);
88 /* Unmap, update size, remap */
89 if (tdb_munmap(tdb) == -1)
90 return TDB_ERRCODE(TDB_ERR_IO, -1);
91 tdb->map_size = st.st_size;
96 /* write a lump of data at a specified offset */
97 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
98 const void *buf, tdb_len_t len)
104 if (tdb->read_only || tdb->traverse_read) {
105 tdb->ecode = TDB_ERR_RDONLY;
109 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
113 memcpy(off + (char *)tdb->map_ptr, buf, len);
114 } else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
115 /* Ensure ecode is set for log fn. */
116 tdb->ecode = TDB_ERR_IO;
117 TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
118 off, len, strerror(errno)));
119 return TDB_ERRCODE(TDB_ERR_IO, -1);
124 /* Endian conversion: we only ever deal with 4 byte quantities */
125 void *tdb_convert(void *buf, u32 size)
128 for (i = 0; i < size / 4; i++)
129 p[i] = TDB_BYTEREV(p[i]);
134 /* read a lump of data at a specified offset, maybe convert */
135 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
136 tdb_len_t len, int cv)
138 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
143 memcpy(buf, off + (char *)tdb->map_ptr, len);
145 ssize_t ret = pread(tdb->fd, buf, len, off);
146 if (ret != (ssize_t)len) {
147 /* Ensure ecode is set for log fn. */
148 tdb->ecode = TDB_ERR_IO;
149 TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d ret=%d (%s) map_size=%d\n",
150 off, len, ret, strerror(errno), tdb->map_size));
151 return TDB_ERRCODE(TDB_ERR_IO, -1);
155 tdb_convert(buf, len);
163 do an unlocked scan of the hash table heads to find the next non-zero head. The value
164 will then be confirmed with the lock held
166 static void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain)
170 for (;h < tdb->header.hash_size;h++) {
171 if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
177 for (;h < tdb->header.hash_size;h++) {
178 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
187 int tdb_munmap(struct tdb_context *tdb)
189 if (tdb->flags & TDB_INTERNAL)
194 int ret = munmap(tdb->map_ptr, tdb->map_size);
203 void tdb_mmap(struct tdb_context *tdb)
205 if (tdb->flags & TDB_INTERNAL)
209 if (!(tdb->flags & TDB_NOMMAP)) {
210 tdb->map_ptr = mmap(NULL, tdb->map_size,
211 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
212 MAP_SHARED|MAP_FILE, tdb->fd, 0);
215 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
218 if (tdb->map_ptr == MAP_FAILED) {
220 TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n",
221 tdb->map_size, strerror(errno)));
231 /* expand a file. we prefer to use ftruncate, as that is what posix
232 says to use for mmap expansion */
233 static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
237 if (tdb->read_only || tdb->traverse_read) {
238 tdb->ecode = TDB_ERR_RDONLY;
242 if (ftruncate(tdb->fd, size+addition) == -1) {
244 if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) {
245 TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n",
246 size+addition, strerror(errno)));
251 /* now fill the file with something. This ensures that the
252 file isn't sparse, which would be very bad if we ran out of
253 disk. This must be done with write, not via mmap */
254 memset(buf, TDB_PAD_BYTE, sizeof(buf));
256 int n = addition>sizeof(buf)?sizeof(buf):addition;
257 int ret = pwrite(tdb->fd, buf, n, size);
259 TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n",
260 n, strerror(errno)));
270 /* expand the database at least size bytes by expanding the underlying
271 file and doing the mmap again if necessary */
272 int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
274 struct list_struct rec;
277 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
278 TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));
282 /* must know about any previous expansions by another process */
283 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
285 /* always make room for at least 10 more records, and round
286 the database up to a multiple of the page size */
287 size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size;
289 if (!(tdb->flags & TDB_INTERNAL))
293 * We must ensure the file is unmapped before doing this
294 * to ensure consistency with systems like OpenBSD where
295 * writes and mmaps are not consistent.
298 /* expand the file itself */
299 if (!(tdb->flags & TDB_INTERNAL)) {
300 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
304 tdb->map_size += size;
306 if (tdb->flags & TDB_INTERNAL) {
307 char *new_map_ptr = realloc(tdb->map_ptr, tdb->map_size);
309 tdb->map_size -= size;
312 tdb->map_ptr = new_map_ptr;
315 * We must ensure the file is remapped before adding the space
316 * to ensure consistency with systems like OpenBSD where
317 * writes and mmaps are not consistent.
320 /* We're ok if the mmap fails as we'll fallback to read/write */
324 /* form a new freelist record */
325 memset(&rec,'\0',sizeof(rec));
326 rec.rec_len = size - sizeof(rec);
328 /* link it into the free list */
329 offset = tdb->map_size - size;
330 if (tdb_free(tdb, offset, &rec) == -1)
333 tdb_unlock(tdb, -1, F_WRLCK);
336 tdb_unlock(tdb, -1, F_WRLCK);
340 /* read/write a tdb_off_t */
341 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
343 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
346 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
349 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
353 /* read a lump of data, allocating the space for it */
354 unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
358 /* some systems don't like zero length malloc */
363 if (!(buf = malloc(len))) {
364 /* Ensure ecode is set for log fn. */
365 tdb->ecode = TDB_ERR_OOM;
366 TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
367 len, strerror(errno)));
368 return TDB_ERRCODE(TDB_ERR_OOM, buf);
370 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
377 /* read/write a record */
378 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
380 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
382 if (TDB_BAD_MAGIC(rec)) {
383 /* Ensure ecode is set for log fn. */
384 tdb->ecode = TDB_ERR_CORRUPT;
385 TDB_LOG((tdb, 0,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
386 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
388 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
391 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
393 struct list_struct r = *rec;
394 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
397 static const struct tdb_methods io_methods = {
407 initialise the default methods table
409 void tdb_io_init(struct tdb_context *tdb)
411 tdb->methods = &io_methods;