source4/lib/tdb/common/freelist.c

   1  /*
   2    Unix SMB/CIFS implementation.
   3
   4    trivial database library
   5
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 2 of the License, or (at your option) any later version.
  18
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, write to the Free Software
  26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27 */
  28
  29 #include "tdb_private.h"
  30
  31 /* read a freelist record and check for simple errors */
  32 static int rec_free_read(struct tdb_context *tdb, tdb_off_t off, struct list_struct *rec)
  33 {
  34         if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1)
  35                 return -1;
  36
  37         if (rec->magic == TDB_MAGIC) {
  38                 /* this happens when a app is showdown while deleting a record - we should
  39                    not completely fail when this happens */
  40                 TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
  41                          rec->magic, off));
  42                 rec->magic = TDB_FREE_MAGIC;
  43                 if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
  44                         return -1;
  45         }
  46
  47         if (rec->magic != TDB_FREE_MAGIC) {
  48                 /* Ensure ecode is set for log fn. */
  49                 tdb->ecode = TDB_ERR_CORRUPT;
  50                 TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n",
  51                            rec->magic, off));
  52                 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
  53         }
  54         if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
  55                 return -1;
  56         return 0;
  57 }
  58
  59
  60
  61 /* Remove an element from the freelist.  Must have alloc lock. */
  62 static int remove_from_freelist(struct tdb_context *tdb, tdb_off_t off, tdb_off_t next)
  63 {
  64         tdb_off_t last_ptr, i;
  65
  66         /* read in the freelist top */
  67         last_ptr = FREELIST_TOP;
  68         while (tdb_ofs_read(tdb, last_ptr, &i) != -1 && i != 0) {
  69                 if (i == off) {
  70                         /* We've found it! */
  71                         return tdb_ofs_write(tdb, last_ptr, &next);
  72                 }
  73                 /* Follow chain (next offset is at start of record) */
  74                 last_ptr = i;
  75         }
  76         TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off));
  77         return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
  78 }
  79
  80
  81 /* update a record tailer (must hold allocation lock) */
  82 static int update_tailer(struct tdb_context *tdb, tdb_off_t offset,
  83                          const struct list_struct *rec)
  84 {
  85         tdb_off_t totalsize;
  86
  87         /* Offset of tailer from record header */
  88         totalsize = sizeof(*rec) + rec->rec_len;
  89         return tdb_ofs_write(tdb, offset + totalsize - sizeof(tdb_off_t),
  90                          &totalsize);
  91 }
  92
  93 /* Add an element into the freelist. Merge adjacent records if
  94    neccessary. */
  95 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
  96 {
  97         tdb_off_t right, left;
  98
  99         /* Allocation and tailer lock */
 100         if (tdb_lock(tdb, -1, F_WRLCK) != 0)
 101                 return -1;
 102
 103         /* set an initial tailer, so if we fail we don't leave a bogus record */
 104         if (update_tailer(tdb, offset, rec) != 0) {
 105                 TDB_LOG((tdb, 0, "tdb_free: upfate_tailer failed!\n"));
 106                 goto fail;
 107         }
 108
 109         /* Look right first (I'm an Australian, dammit) */
 110         right = offset + sizeof(*rec) + rec->rec_len;
 111         if (right + sizeof(*rec) <= tdb->map_size) {
 112                 struct list_struct r;
 113
 114                 if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) {
 115                         TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right));
 116                         goto left;
 117                 }
 118
 119                 /* If it's free, expand to include it. */
 120                 if (r.magic == TDB_FREE_MAGIC) {
 121                         if (remove_from_freelist(tdb, right, r.next) == -1) {
 122                                 TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right));
 123                                 goto left;
 124                         }
 125                         rec->rec_len += sizeof(r) + r.rec_len;
 126                 }
 127         }
 128
 129 left:
 130         /* Look left */
 131         left = offset - sizeof(tdb_off_t);
 132         if (left > TDB_DATA_START(tdb->header.hash_size)) {
 133                 struct list_struct l;
 134                 tdb_off_t leftsize;
 135
 136                 /* Read in tailer and jump back to header */
 137                 if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
 138                         TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left));
 139                         goto update;
 140                 }
 141                 left = offset - leftsize;
 142
 143                 /* Now read in record */
 144                 if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
 145                         TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
 146                         goto update;
 147                 }
 148
 149                 /* If it's free, expand to include it. */
 150                 if (l.magic == TDB_FREE_MAGIC) {
 151                         if (remove_from_freelist(tdb, left, l.next) == -1) {
 152                                 TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left));
 153                                 goto update;
 154                         } else {
 155                                 offset = left;
 156                                 rec->rec_len += leftsize;
 157                         }
 158                 }
 159         }
 160
 161 update:
 162         if (update_tailer(tdb, offset, rec) == -1) {
 163                 TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset));
 164                 goto fail;
 165         }
 166
 167         /* Now, prepend to free list */
 168         rec->magic = TDB_FREE_MAGIC;
 169
 170         if (tdb_ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||
 171             tdb_rec_write(tdb, offset, rec) == -1 ||
 172             tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
 173                 TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset));
 174                 goto fail;
 175         }
 176
 177         /* And we're done. */
 178         tdb_unlock(tdb, -1, F_WRLCK);
 179         return 0;
 180
 181  fail:
 182         tdb_unlock(tdb, -1, F_WRLCK);
 183         return -1;
 184 }
 185
 186
 187 /*
 188    the core of tdb_allocate - called when we have decided which
 189    free list entry to use
 190  */
 191 static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, tdb_len_t length, tdb_off_t rec_ptr,
 192                                 struct list_struct *rec, tdb_off_t last_ptr)
 193 {
 194         struct list_struct newrec;
 195         tdb_off_t newrec_ptr;
 196
 197         memset(&newrec, '\0', sizeof(newrec));
 198
 199         /* found it - now possibly split it up  */
 200         if (rec->rec_len > length + MIN_REC_SIZE) {
 201                 /* Length of left piece */
 202                 length = TDB_ALIGN(length, TDB_ALIGNMENT);
 203
 204                 /* Right piece to go on free list */
 205                 newrec.rec_len = rec->rec_len - (sizeof(*rec) + length);
 206                 newrec_ptr = rec_ptr + sizeof(*rec) + length;
 207
 208                 /* And left record is shortened */
 209                 rec->rec_len = length;
 210         } else {
 211                 newrec_ptr = 0;
 212         }
 213
 214         /* Remove allocated record from the free list */
 215         if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1) {
 216                 return 0;
 217         }
 218
 219         /* Update header: do this before we drop alloc
 220            lock, otherwise tdb_free() might try to
 221            merge with us, thinking we're free.
 222            (Thanks Jeremy Allison). */
 223         rec->magic = TDB_MAGIC;
 224         if (tdb_rec_write(tdb, rec_ptr, rec) == -1) {
 225                 return 0;
 226         }
 227
 228         /* Did we create new block? */
 229         if (newrec_ptr) {
 230                 /* Update allocated record tailer (we
 231                    shortened it). */
 232                 if (update_tailer(tdb, rec_ptr, rec) == -1) {
 233                         return 0;
 234                 }
 235
 236                 /* Free new record */
 237                 if (tdb_free(tdb, newrec_ptr, &newrec) == -1) {
 238                         return 0;
 239                 }
 240         }
 241
 242         /* all done - return the new record offset */
 243         return rec_ptr;
 244 }
 245
 246 /* allocate some space from the free list. The offset returned points
 247    to a unconnected list_struct within the database with room for at
 248    least length bytes of total data
 249
 250    0 is returned if the space could not be allocated
 251  */
 252 tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct list_struct *rec)
 253 {
 254         tdb_off_t rec_ptr, last_ptr, newrec_ptr;
 255         struct {
 256                 tdb_off_t rec_ptr, last_ptr;
 257                 tdb_len_t rec_len;
 258         } bestfit;
 259
 260         if (tdb_lock(tdb, -1, F_WRLCK) == -1)
 261                 return 0;
 262
 263         /* Extra bytes required for tailer */
 264         length += sizeof(tdb_off_t);
 265
 266  again:
 267         last_ptr = FREELIST_TOP;
 268
 269         /* read in the freelist top */
 270         if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
 271                 goto fail;
 272
 273         bestfit.rec_ptr = 0;
 274
 275         /*
 276            this is a best fit allocation strategy. Originally we used
 277            a first fit strategy, but it suffered from massive fragmentation
 278            issues when faced with a slowly increasing record size.
 279          */
 280         while (rec_ptr) {
 281                 if (rec_free_read(tdb, rec_ptr, rec) == -1) {
 282                         goto fail;
 283                 }
 284
 285                 if (rec->rec_len >= length) {
 286                         if (bestfit.rec_ptr == 0 ||
 287                             rec->rec_len < bestfit.rec_len) {
 288                                 bestfit.rec_len = rec->rec_len;
 289                                 bestfit.rec_ptr = rec_ptr;
 290                                 bestfit.last_ptr = last_ptr;
 291                                 /* consider a fit to be good enough if
 292                                    we aren't wasting more than half
 293                                    the space */
 294                                 if (bestfit.rec_len < 2*length) {
 295                                         break;
 296                                 }
 297                         }
 298                 }
 299
 300                 /* move to the next record */
 301                 last_ptr = rec_ptr;
 302                 rec_ptr = rec->next;
 303         }
 304
 305         if (bestfit.rec_ptr != 0) {
 306                 if (rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
 307                         goto fail;
 308                 }
 309
 310                 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, rec, bestfit.last_ptr);
 311                 tdb_unlock(tdb, -1, F_WRLCK);
 312                 return newrec_ptr;
 313         }
 314
 315         /* we didn't find enough space. See if we can expand the
 316            database and if we can then try again */
 317         if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
 318                 goto again;
 319  fail:
 320         tdb_unlock(tdb, -1, F_WRLCK);
 321         return 0;
 322 }
 323