2 Unix SMB/CIFS implementation.
4 trivial database library
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #include "tdb_private.h"
33 static ssize_t pread(int fd, void *buf, size_t count, off_t offset)
35 if (lseek(fd, offset, SEEK_SET) != offset) {
39 return read(fd, buf, count);
44 static ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset)
46 if (lseek(fd, offset, SEEK_SET) != offset) {
50 return write(fd, buf, count);
54 /* check for an out of bounds access - if it is out of bounds then
55 see if the database has been expanded by someone else and expand
57 note that "len" is the minimum length needed for the db
59 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
62 if (len <= tdb->map_size)
64 if (tdb->flags & TDB_INTERNAL) {
66 /* Ensure ecode is set for log fn. */
67 tdb->ecode = TDB_ERR_IO;
68 TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n",
69 (int)len, (int)tdb->map_size));
71 return TDB_ERRCODE(TDB_ERR_IO, -1);
74 if (fstat(tdb->fd, &st) == -1) {
75 return TDB_ERRCODE(TDB_ERR_IO, -1);
78 if (st.st_size < (size_t)len) {
80 /* Ensure ecode is set for log fn. */
81 tdb->ecode = TDB_ERR_IO;
82 TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n",
83 (int)len, (int)st.st_size));
85 return TDB_ERRCODE(TDB_ERR_IO, -1);
88 /* Unmap, update size, remap */
89 if (tdb_munmap(tdb) == -1)
90 return TDB_ERRCODE(TDB_ERR_IO, -1);
91 tdb->map_size = st.st_size;
96 /* write a lump of data at a specified offset */
97 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
98 const void *buf, tdb_len_t len)
100 if (tdb->read_only) {
101 tdb->ecode = TDB_ERR_RDONLY;
105 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
109 memcpy(off + (char *)tdb->map_ptr, buf, len);
110 } else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
111 /* Ensure ecode is set for log fn. */
112 tdb->ecode = TDB_ERR_IO;
113 TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n",
114 off, len, strerror(errno)));
115 return TDB_ERRCODE(TDB_ERR_IO, -1);
120 /* Endian conversion: we only ever deal with 4 byte quantities */
121 void *tdb_convert(void *buf, u32 size)
124 for (i = 0; i < size / 4; i++)
125 p[i] = TDB_BYTEREV(p[i]);
130 /* read a lump of data at a specified offset, maybe convert */
131 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
132 tdb_len_t len, int cv)
134 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
139 memcpy(buf, off + (char *)tdb->map_ptr, len);
141 ssize_t ret = pread(tdb->fd, buf, len, off);
142 if (ret != (ssize_t)len) {
143 /* Ensure ecode is set for log fn. */
144 tdb->ecode = TDB_ERR_IO;
145 TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d ret=%d (%s) map_size=%d\n",
146 off, len, ret, strerror(errno), tdb->map_size));
147 return TDB_ERRCODE(TDB_ERR_IO, -1);
151 tdb_convert(buf, len);
159 do an unlocked scan of the hash table heads to find the next non-zero head. The value
160 will then be confirmed with the lock held
162 static void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain)
166 for (;h < tdb->header.hash_size;h++) {
167 if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
173 for (;h < tdb->header.hash_size;h++) {
174 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
183 int tdb_munmap(struct tdb_context *tdb)
185 if (tdb->flags & TDB_INTERNAL)
190 int ret = munmap(tdb->map_ptr, tdb->map_size);
199 void tdb_mmap(struct tdb_context *tdb)
201 if (tdb->flags & TDB_INTERNAL)
205 if (!(tdb->flags & TDB_NOMMAP)) {
206 tdb->map_ptr = mmap(NULL, tdb->map_size,
207 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
208 MAP_SHARED|MAP_FILE, tdb->fd, 0);
211 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
214 if (tdb->map_ptr == MAP_FAILED) {
216 TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n",
217 tdb->map_size, strerror(errno)));
227 /* expand a file. we prefer to use ftruncate, as that is what posix
228 says to use for mmap expansion */
229 static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
233 if (tdb->read_only) {
234 tdb->ecode = TDB_ERR_RDONLY;
238 if (ftruncate(tdb->fd, size+addition) == -1) {
240 if (pwrite(tdb->fd, &b, 1, (size+addition) - 1) != 1) {
241 TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n",
242 size+addition, strerror(errno)));
247 /* now fill the file with something. This ensures that the
248 file isn't sparse, which would be very bad if we ran out of
249 disk. This must be done with write, not via mmap */
250 memset(buf, TDB_PAD_BYTE, sizeof(buf));
252 int n = addition>sizeof(buf)?sizeof(buf):addition;
253 int ret = pwrite(tdb->fd, buf, n, size);
255 TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n",
256 n, strerror(errno)));
266 /* expand the database at least size bytes by expanding the underlying
267 file and doing the mmap again if necessary */
268 int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
270 struct list_struct rec;
273 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
274 TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));
278 /* must know about any previous expansions by another process */
279 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
281 /* always make room for at least 10 more records, and round
282 the database up to a multiple of the page size */
283 size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size;
285 if (!(tdb->flags & TDB_INTERNAL))
289 * We must ensure the file is unmapped before doing this
290 * to ensure consistency with systems like OpenBSD where
291 * writes and mmaps are not consistent.
294 /* expand the file itself */
295 if (!(tdb->flags & TDB_INTERNAL)) {
296 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
300 tdb->map_size += size;
302 if (tdb->flags & TDB_INTERNAL) {
303 char *new_map_ptr = realloc(tdb->map_ptr, tdb->map_size);
305 tdb->map_size -= size;
308 tdb->map_ptr = new_map_ptr;
311 * We must ensure the file is remapped before adding the space
312 * to ensure consistency with systems like OpenBSD where
313 * writes and mmaps are not consistent.
316 /* We're ok if the mmap fails as we'll fallback to read/write */
320 /* form a new freelist record */
321 memset(&rec,'\0',sizeof(rec));
322 rec.rec_len = size - sizeof(rec);
324 /* link it into the free list */
325 offset = tdb->map_size - size;
326 if (tdb_free(tdb, offset, &rec) == -1)
329 tdb_unlock(tdb, -1, F_WRLCK);
332 tdb_unlock(tdb, -1, F_WRLCK);
336 /* read/write a tdb_off_t */
337 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
339 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
342 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
345 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
349 /* read a lump of data, allocating the space for it */
350 unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
354 if (!(buf = malloc(len))) {
355 /* Ensure ecode is set for log fn. */
356 tdb->ecode = TDB_ERR_OOM;
357 TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n",
358 len, strerror(errno)));
359 return TDB_ERRCODE(TDB_ERR_OOM, buf);
361 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
368 /* read/write a record */
369 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
371 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
373 if (TDB_BAD_MAGIC(rec)) {
374 /* Ensure ecode is set for log fn. */
375 tdb->ecode = TDB_ERR_CORRUPT;
376 TDB_LOG((tdb, 0,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
377 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
379 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
382 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
384 struct list_struct r = *rec;
385 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
388 static const struct tdb_methods io_methods = {
389 .tdb_read = tdb_read,
390 .tdb_write = tdb_write,
391 .next_hash_chain = tdb_next_hash_chain,
393 .tdb_expand_file = tdb_expand_file,
394 .tdb_brlock = tdb_brlock
398 initialise the default methods table
400 void tdb_io_init(struct tdb_context *tdb)
402 tdb->methods = &io_methods;