source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2000
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20    Revision History:
  21
  22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  23 */
  24
  25 #include "includes.h"
  26
  27 /*
  28  * The POSIX locking database handle.
  29  */
  30
  31 static TDB_CONTEXT *posix_lock_tdb;
  32
  33 /*
  34  * The pending close database handle.
  35  */
  36
  37 static TDB_CONTEXT *posix_pending_close_tdb;
  38
  39 /*
  40  * The data in POSIX lock records is an unsorted linear array of these
  41  * records.  It is unnecessary to store the count as tdb provides the
  42  * size of the record.
  43  */
  44
  45 struct posix_lock {
  46         int fd;
  47         SMB_OFF_T start;
  48         SMB_OFF_T size;
  49         int lock_type;
  50 };
  51
  52 /*
  53  * The data in POSIX pending close records is an unsorted linear array of int
  54  * records.  It is unnecessary to store the count as tdb provides the
  55  * size of the record.
  56  */
  57
  58 /* The key used in both the POSIX databases. */
  59
  60 struct posix_lock_key {
  61         SMB_DEV_T device;
  62         SMB_INO_T inode;
  63 };
  64
  65 /*******************************************************************
  66  Form a static locking key for a dev/inode pair.
  67 ******************************************************************/
  68
  69 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  70 {
  71         static struct posix_lock_key key;
  72         TDB_DATA kbuf;
  73
  74         memset(&key, '\0', sizeof(key));
  75         key.device = dev;
  76         key.inode = inode;
  77         kbuf.dptr = (char *)&key;
  78         kbuf.dsize = sizeof(key);
  79         return kbuf;
  80 }
  81
  82 /*******************************************************************
  83  Convenience function to get a key from an fsp.
  84 ******************************************************************/
  85
  86 static TDB_DATA locking_key_fsp(files_struct *fsp)
  87 {
  88         return locking_key(fsp->dev, fsp->inode);
  89 }
  90
  91 /****************************************************************************
  92  Add an fd to the pending close tdb.
  93 ****************************************************************************/
  94
  95 static BOOL add_fd_to_close_entry(files_struct *fsp)
  96 {
  97         TDB_DATA kbuf = locking_key_fsp(fsp);
  98         TDB_DATA dbuf;
  99         char *tp;
 100
 101         dbuf.dptr = NULL;
 102
 103         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 104
 105         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
 106         if (!tp) {
 107                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 108                 SAFE_FREE(dbuf.dptr);
 109                 return False;
 110         } else
 111                 dbuf.dptr = tp;
 112
 113         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
 114         dbuf.dsize += sizeof(int);
 115
 116         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 117                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 118         }
 119
 120         SAFE_FREE(dbuf.dptr);
 121         return True;
 122 }
 123
 124 /****************************************************************************
 125  Remove all fd entries for a specific dev/inode pair from the tdb.
 126 ****************************************************************************/
 127
 128 static void delete_close_entries(files_struct *fsp)
 129 {
 130         TDB_DATA kbuf = locking_key_fsp(fsp);
 131
 132         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 133                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 134 }
 135
 136 /****************************************************************************
 137  Get the array of POSIX pending close records for an open fsp. Caller must
 138  free. Returns number of entries.
 139 ****************************************************************************/
 140
 141 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 142 {
 143         TDB_DATA kbuf = locking_key_fsp(fsp);
 144         TDB_DATA dbuf;
 145         size_t count = 0;
 146
 147         *entries = NULL;
 148         dbuf.dptr = NULL;
 149
 150         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 151
 152     if (!dbuf.dptr) {
 153                 return 0;
 154         }
 155
 156         *entries = (int *)dbuf.dptr;
 157         count = (size_t)(dbuf.dsize / sizeof(int));
 158
 159         return count;
 160 }
 161
 162 /****************************************************************************
 163  Get the array of POSIX locks for an fsp. Caller must free. Returns
 164  number of entries.
 165 ****************************************************************************/
 166
 167 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 168 {
 169         TDB_DATA kbuf = locking_key_fsp(fsp);
 170         TDB_DATA dbuf;
 171         size_t count = 0;
 172
 173         *entries = NULL;
 174
 175         dbuf.dptr = NULL;
 176
 177         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 178
 179     if (!dbuf.dptr) {
 180                 return 0;
 181         }
 182
 183         *entries = (struct posix_lock *)dbuf.dptr;
 184         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 185
 186         return count;
 187 }
 188
 189 /****************************************************************************
 190  Deal with pending closes needed by POSIX locking support.
 191  Note that posix_locking_close_file() is expected to have been called
 192  to delete all locks on this fsp before this function is called.
 193 ****************************************************************************/
 194
 195 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 196 {
 197         int saved_errno = 0;
 198         int ret;
 199         size_t count, i;
 200         struct posix_lock *entries = NULL;
 201         int *fd_array = NULL;
 202         BOOL locks_on_other_fds = False;
 203
 204         if (!lp_posix_locking(SNUM(conn))) {
 205                 /*
 206                  * No POSIX to worry about, just close.
 207                  */
 208                 ret = conn->vfs_ops.close(fsp,fsp->fd);
 209                 fsp->fd = -1;
 210                 return ret;
 211         }
 212
 213         /*
 214          * Get the number of outstanding POSIX locks on this dev/inode pair.
 215          */
 216
 217         count = get_posix_lock_entries(fsp, &entries);
 218
 219         /*
 220          * Check if there are any outstanding locks belonging to
 221          * other fd's. This should never be the case if posix_locking_close_file()
 222          * has been called first, but it never hurts to be *sure*.
 223          */
 224
 225         for (i = 0; i < count; i++) {
 226                 if (entries[i].fd != fsp->fd) {
 227                         locks_on_other_fds = True;
 228                         break;
 229                 }
 230         }
 231
 232         if (locks_on_other_fds) {
 233
 234                 /*
 235                  * There are outstanding locks on this dev/inode pair on other fds.
 236                  * Add our fd to the pending close tdb and set fsp->fd to -1.
 237                  */
 238
 239                 if (!add_fd_to_close_entry(fsp)) {
 240                         SAFE_FREE(entries);
 241                         return False;
 242                 }
 243
 244                 SAFE_FREE(entries);
 245                 fsp->fd = -1;
 246                 return 0;
 247         }
 248
 249         SAFE_FREE(entries);
 250
 251         /*
 252          * No outstanding POSIX locks. Get the pending close fd's
 253          * from the tdb and close them all.
 254          */
 255
 256         count = get_posix_pending_close_entries(fsp, &fd_array);
 257
 258         if (count) {
 259                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 260
 261                 for(i = 0; i < count; i++) {
 262                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
 263                                 saved_errno = errno;
 264                         }
 265                 }
 266
 267                 /*
 268                  * Delete all fd's stored in the tdb
 269                  * for this dev/inode pair.
 270                  */
 271
 272                 delete_close_entries(fsp);
 273         }
 274
 275         SAFE_FREE(fd_array);
 276
 277         /*
 278          * Finally close the fd associated with this fsp.
 279          */
 280
 281         ret = conn->vfs_ops.close(fsp,fsp->fd);
 282
 283         if (saved_errno != 0) {
 284         errno = saved_errno;
 285                 ret = -1;
 286     }
 287
 288         fsp->fd = -1;
 289
 290         return ret;
 291 }
 292
 293 /****************************************************************************
 294  Debugging aid :-).
 295 ****************************************************************************/
 296
 297 static const char *posix_lock_type_name(int lock_type)
 298 {
 299         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 300 }
 301
 302 /****************************************************************************
 303  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
 304  then the POSIX fcntl lock fails.
 305 ****************************************************************************/
 306
 307 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
 308 {
 309         TDB_DATA kbuf = locking_key_fsp(fsp);
 310         TDB_DATA dbuf;
 311         struct posix_lock *locks;
 312         size_t count;
 313
 314         dbuf.dptr = NULL;
 315
 316         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 317
 318         if (!dbuf.dptr) {
 319                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
 320                 goto fail;
 321         }
 322
 323         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 324         locks = (struct posix_lock *)dbuf.dptr;
 325
 326         if (count == 1) {
 327                 tdb_delete(posix_lock_tdb, kbuf);
 328         } else {
 329                 if (entry < count-1) {
 330                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
 331                 }
 332                 dbuf.dsize -= sizeof(*locks);
 333                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 334         }
 335
 336         SAFE_FREE(dbuf.dptr);
 337
 338         return True;
 339
 340  fail:
 341     SAFE_FREE(dbuf.dptr);
 342     return False;
 343 }
 344
 345 /****************************************************************************
 346  Add an entry into the POSIX locking tdb. We return the index number of the
 347  added lock (used in case we need to delete *exactly* this entry). Returns
 348  False on fail, True on success.
 349 ****************************************************************************/
 350
 351 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
 352 {
 353         TDB_DATA kbuf = locking_key_fsp(fsp);
 354         TDB_DATA dbuf;
 355         struct posix_lock pl;
 356         char *tp;
 357
 358         dbuf.dptr = NULL;
 359
 360         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 361
 362         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
 363
 364         /*
 365          * Add new record.
 366          */
 367
 368         pl.fd = fsp->fd;
 369         pl.start = start;
 370         pl.size = size;
 371         pl.lock_type = lock_type;
 372
 373         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
 374         if (!tp) {
 375                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 376                 goto fail;
 377         } else
 378                 dbuf.dptr = tp;
 379
 380         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
 381         dbuf.dsize += sizeof(pl);
 382
 383         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 384                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 385                 goto fail;
 386         }
 387
 388     SAFE_FREE(dbuf.dptr);
 389
 390         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
 391                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
 392                         (double)fsp->dev, (double)fsp->inode ));
 393
 394     return True;
 395
 396  fail:
 397     SAFE_FREE(dbuf.dptr);
 398     return False;
 399 }
 400
 401 /****************************************************************************
 402  Calculate if locks have any overlap at all.
 403 ****************************************************************************/
 404
 405 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
 406 {
 407         if (start1 >= start2 && start1 <= start2 + size2)
 408                 return True;
 409
 410         if (start1 < start2 && start1 + size1 > start2)
 411                 return True;
 412
 413         return False;
 414 }
 415
 416 /****************************************************************************
 417  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 418  deleted and the number of records that are overlapped by this one, or -1 on error.
 419 ****************************************************************************/
 420
 421 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 422 {
 423         TDB_DATA kbuf = locking_key_fsp(fsp);
 424         TDB_DATA dbuf;
 425         struct posix_lock *locks;
 426         size_t i, count;
 427         BOOL found = False;
 428         int num_overlapping_records = 0;
 429
 430         dbuf.dptr = NULL;
 431
 432         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 433
 434         if (!dbuf.dptr) {
 435                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 436                 goto fail;
 437         }
 438
 439         /* There are existing locks - find a match. */
 440         locks = (struct posix_lock *)dbuf.dptr;
 441         count = (size_t)(dbuf.dsize / sizeof(*locks));
 442
 443         /*
 444          * Search for and delete the first record that matches the
 445          * unlock criteria.
 446          */
 447
 448         for (i=0; i<count; i++) {
 449                 struct posix_lock *entry = &locks[i];
 450
 451                 if (entry->fd == fsp->fd &&
 452                         entry->start == start &&
 453                         entry->size == size) {
 454
 455                         /* Make a copy if requested. */
 456                         if (pl)
 457                                 *pl = *entry;
 458
 459                         /* Found it - delete it. */
 460                         if (count == 1) {
 461                                 tdb_delete(posix_lock_tdb, kbuf);
 462                         } else {
 463                                 if (i < count-1) {
 464                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
 465                                 }
 466                                 dbuf.dsize -= sizeof(*locks);
 467                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 468                         }
 469                         count--;
 470                         found = True;
 471                         break;
 472                 }
 473         }
 474
 475         if (!found)
 476                 goto fail;
 477
 478         /*
 479          * Count the number of entries that are
 480          * overlapped by this unlock request.
 481          */
 482
 483         for (i = 0; i < count; i++) {
 484                 struct posix_lock *entry = &locks[i];
 485
 486                 if (fsp->fd == entry->fd &&
 487                         does_lock_overlap( start, size, entry->start, entry->size))
 488                                 num_overlapping_records++;
 489         }
 490
 491         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 492                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
 493                                 (unsigned int)num_overlapping_records ));
 494
 495     SAFE_FREE(dbuf.dptr);
 496
 497         return num_overlapping_records;
 498
 499  fail:
 500     SAFE_FREE(dbuf.dptr);
 501     return -1;
 502 }
 503
 504 /****************************************************************************
 505  Utility function to map a lock type correctly depending on the open
 506  mode of a file.
 507 ****************************************************************************/
 508
 509 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 510 {
 511         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 512                 /*
 513                  * Many UNIX's cannot get a write lock on a file opened read-only.
 514                  * Win32 locking semantics allow this.
 515                  * Do the best we can and attempt a read-only lock.
 516                  */
 517                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 518                 return F_RDLCK;
 519         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
 520                 /*
 521                  * Ditto for read locks on write only files.
 522                  */
 523                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 524                 return F_WRLCK;
 525         }
 526
 527   /*
 528    * This return should be the most normal, as we attempt
 529    * to always open files read/write.
 530    */
 531
 532   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 533 }
 534
 535 /****************************************************************************
 536  Check to see if the given unsigned lock range is within the possible POSIX
 537  range. Modifies the given args to be in range if possible, just returns
 538  False if not.
 539 ****************************************************************************/
 540
 541 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 542                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 543 {
 544         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
 545         SMB_OFF_T count = (SMB_OFF_T)u_count;
 546
 547         /*
 548          * For the type of system we are, attempt to
 549          * find the maximum positive lock offset as an SMB_OFF_T.
 550          */
 551
 552 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 553
 554         /*
 555          * In this case SMB_OFF_T is 64 bits,
 556          * and the underlying system can handle 64 bit signed locks.
 557          */
 558
 559     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 560     SMB_OFF_T mask = (mask2<<1);
 561     SMB_OFF_T max_positive_lock_offset = ~mask;
 562
 563 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 564
 565         /*
 566          * In this case either SMB_OFF_T is 32 bits,
 567          * or the underlying system cannot handle 64 bit signed locks.
 568          * All offsets & counts must be 2^31 or less.
 569          */
 570
 571     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
 572
 573 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 574
 575         /*
 576          * POSIX locks of length zero mean lock to end-of-file.
 577          * Win32 locks of length zero are point probes. Ignore
 578          * any Win32 locks of length zero. JRA.
 579          */
 580
 581         if (count == (SMB_OFF_T)0) {
 582                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 583                 return False;
 584         }
 585
 586         /*
 587          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 588          * ignore this lock.
 589          */
 590
 591         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
 592                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 593                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
 594                 return False;
 595         }
 596
 597         /*
 598          * We must truncate the offset and count to less than max_positive_lock_offset.
 599          */
 600
 601         offset &= max_positive_lock_offset;
 602         count &= max_positive_lock_offset;
 603
 604
 605         /*
 606          * Deal with a very common case of count of all ones.
 607          * (lock entire file).
 608          */
 609
 610         if(count == (SMB_OFF_T)-1)
 611                 count = max_positive_lock_offset;
 612
 613         /*
 614          * Truncate count to end at max lock offset.
 615          */
 616
 617         if (offset + count < 0 || offset + count > max_positive_lock_offset)
 618                 count = max_positive_lock_offset - offset;
 619
 620         /*
 621          * If we ate all the count, ignore this lock.
 622          */
 623
 624         if (count == 0) {
 625                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 626                                 (double)u_offset, (double)u_count ));
 627                 return False;
 628         }
 629
 630         /*
 631          * The mapping was successful.
 632          */
 633
 634         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 635                         (double)offset, (double)count ));
 636
 637         *offset_out = offset;
 638         *count_out = count;
 639
 640         return True;
 641 }
 642
 643 /****************************************************************************
 644  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 645  broken NFS implementations.
 646 ****************************************************************************/
 647
 648 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 649 {
 650         int ret;
 651         struct connection_struct *conn = fsp->conn;
 652
 653         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
 654
 655         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 656
 657         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 658
 659                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 660                                         (double)offset,(double)count));
 661                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 662                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
 663
 664                 /*
 665                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 666                  * 32 bit NFS mounted filesystems. Just ignore it.
 667                  */
 668
 669                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
 670                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 671                         return True;
 672                 }
 673
 674                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
 675                         /* 32 bit NFS file system, retry with smaller offset */
 676                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 677                         errno = 0;
 678                         count &= 0x7fffffff;
 679                         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 680                 }
 681         }
 682
 683         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 684
 685         return ret;
 686 }
 687
 688 /****************************************************************************
 689  POSIX function to see if a file region is locked. Returns True if the
 690  region is locked, False otherwise.
 691 ****************************************************************************/
 692
 693 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 694 {
 695         SMB_OFF_T offset;
 696         SMB_OFF_T count;
 697         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 698
 699         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 700                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 701
 702         /*
 703          * If the requested lock won't fit in the POSIX range, we will
 704          * never set it, so presume it is not locked.
 705          */
 706
 707         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 708                 return False;
 709
 710         /*
 711          * Note that most UNIX's can *test* for a write lock on
 712          * a read-only fd, just not *set* a write lock on a read-only
 713          * fd. So we don't need to use map_lock_type here.
 714          */
 715
 716         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 717 }
 718
 719 /*
 720  * Structure used when splitting a lock range
 721  * into a POSIX lock range. Doubly linked list.
 722  */
 723
 724 struct lock_list {
 725     struct lock_list *next;
 726     struct lock_list *prev;
 727     SMB_OFF_T start;
 728     SMB_OFF_T size;
 729 };
 730
 731 /****************************************************************************
 732  Create a list of lock ranges that don't overlap a given range. Used in calculating
 733  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 734  understand it :-).
 735 ****************************************************************************/
 736
 737 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
 738 {
 739         TDB_DATA kbuf = locking_key_fsp(fsp);
 740         TDB_DATA dbuf;
 741         struct posix_lock *locks;
 742         size_t num_locks, i;
 743
 744         dbuf.dptr = NULL;
 745
 746         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 747
 748         if (!dbuf.dptr)
 749                 return lhead;
 750
 751         locks = (struct posix_lock *)dbuf.dptr;
 752         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
 753
 754         /*
 755          * Check the current lock list on this dev/inode pair.
 756          * Quit if the list is deleted.
 757          */
 758
 759         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 760                 (double)lhead->start, (double)lhead->size ));
 761
 762         for (i=0; i<num_locks && lhead; i++) {
 763
 764                 struct posix_lock *lock = &locks[i];
 765                 struct lock_list *l_curr;
 766
 767                 /*
 768                  * Walk the lock list, checking for overlaps. Note that
 769                  * the lock list can expand within this loop if the current
 770                  * range being examined needs to be split.
 771                  */
 772
 773                 for (l_curr = lhead; l_curr;) {
 774
 775                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
 776                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 777
 778                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 779                                  (lock->start >= (l_curr->start + l_curr->size))) {
 780
 781                                 /* No overlap with this lock - leave this range alone. */
 782 /*********************************************
 783                                              +---------+
 784                                              | l_curr  |
 785                                              +---------+
 786                                 +-------+
 787                                 | lock  |
 788                                 +-------+
 789 OR....
 790              +---------+
 791              |  l_curr |
 792              +---------+
 793 **********************************************/
 794
 795                                 DEBUG(10,("no overlap case.\n" ));
 796
 797                                 l_curr = l_curr->next;
 798
 799                         } else if ( (l_curr->start >= lock->start) &&
 800                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 801
 802                                 /*
 803                                  * This unlock is completely overlapped by this existing lock range
 804                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 805                                  */
 806 /*********************************************
 807                 +---------+
 808                 |  l_curr |
 809                 +---------+
 810         +---------------------------+
 811         |       lock                |
 812         +---------------------------+
 813 **********************************************/
 814                                 /* Save the next pointer */
 815                                 struct lock_list *ul_next = l_curr->next;
 816
 817                                 DEBUG(10,("delete case.\n" ));
 818
 819                                 DLIST_REMOVE(lhead, l_curr);
 820                                 if(lhead == NULL)
 821                                         break; /* No more list... */
 822
 823                                 l_curr = ul_next;
 824
 825                         } else if ( (l_curr->start >= lock->start) &&
 826                                                 (l_curr->start < lock->start + lock->size) &&
 827                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 828
 829                                 /*
 830                                  * This unlock overlaps the existing lock range at the high end.
 831                                  * Truncate by moving start to existing range end and reducing size.
 832                                  */
 833 /*********************************************
 834                 +---------------+
 835                 |  l_curr       |
 836                 +---------------+
 837         +---------------+
 838         |    lock       |
 839         +---------------+
 840 BECOMES....
 841                         +-------+
 842                         | l_curr|
 843                         +-------+
 844 **********************************************/
 845
 846                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 847                                 l_curr->start = lock->start + lock->size;
 848
 849                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
 850                                                                 (double)l_curr->start, (double)l_curr->size ));
 851
 852                                 l_curr = l_curr->next;
 853
 854                         } else if ( (l_curr->start < lock->start) &&
 855                                                 (l_curr->start + l_curr->size > lock->start) &&
 856                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 857
 858                                 /*
 859                                  * This unlock overlaps the existing lock range at the low end.
 860                                  * Truncate by reducing size.
 861                                  */
 862 /*********************************************
 863    +---------------+
 864    |  l_curr       |
 865    +---------------+
 866            +---------------+
 867            |    lock       |
 868            +---------------+
 869 BECOMES....
 870    +-------+
 871    | l_curr|
 872    +-------+
 873 **********************************************/
 874
 875                                 l_curr->size = lock->start - l_curr->start;
 876
 877                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
 878                                                                 (double)l_curr->start, (double)l_curr->size ));
 879
 880                                 l_curr = l_curr->next;
 881
 882                         } else if ( (l_curr->start < lock->start) &&
 883                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 884                                 /*
 885                                  * Worst case scenario. Unlock request completely overlaps an existing
 886                                  * lock range. Split the request into two, push the new (upper) request
 887                                  * into the dlink list, and continue with the entry after ul_new (as we
 888                                  * know that ul_new will not overlap with this lock).
 889                                  */
 890 /*********************************************
 891         +---------------------------+
 892         |        l_curr             |
 893         +---------------------------+
 894                 +---------+
 895                 | lock    |
 896                 +---------+
 897 BECOMES.....
 898         +-------+         +---------+
 899         | l_curr|         | l_new   |
 900         +-------+         +---------+
 901 **********************************************/
 902                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
 903                                                                                                         sizeof(struct lock_list));
 904
 905                                 if(l_new == NULL) {
 906                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 907                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 908                                 }
 909
 910                                 ZERO_STRUCTP(l_new);
 911                                 l_new->start = lock->start + lock->size;
 912                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 913
 914                                 /* Truncate the l_curr. */
 915                                 l_curr->size = lock->start - l_curr->start;
 916
 917                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
 918 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 919                                                                 (double)l_new->start, (double)l_new->size ));
 920
 921                                 /*
 922                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 923                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
 924                                  */
 925
 926                                 l_new->prev = l_curr;
 927                                 l_new->next = l_curr->next;
 928                                 l_curr->next = l_new;
 929
 930                                 /* And move after the link we added. */
 931                                 l_curr = l_new->next;
 932
 933                         } else {
 934
 935                                 /*
 936                                  * This logic case should never happen. Ensure this is the
 937                                  * case by forcing an abort.... Remove in production.
 938                                  */
 939                                 pstring msg;
 940
 941                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 942 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
 943
 944                                 smb_panic(msg);
 945                         }
 946                 } /* end for ( l_curr = lhead; l_curr;) */
 947         } /* end for (i=0; i<num_locks && ul_head; i++) */
 948
 949         SAFE_FREE(dbuf.dptr);
 950
 951         return lhead;
 952 }
 953
 954 /****************************************************************************
 955  POSIX function to acquire a lock. Returns True if the
 956  lock could be granted, False if not.
 957 ****************************************************************************/
 958
 959 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 960 {
 961         SMB_OFF_T offset;
 962         SMB_OFF_T count;
 963         BOOL ret = True;
 964         size_t entry_num = 0;
 965         size_t lock_count;
 966         TALLOC_CTX *l_ctx = NULL;
 967         struct lock_list *llist = NULL;
 968         struct lock_list *ll = NULL;
 969         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 970
 971         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 972                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 973
 974         /*
 975          * If the requested lock won't fit in the POSIX range, we will
 976          * pretend it was successful.
 977          */
 978
 979         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 980                 return True;
 981
 982         /*
 983          * Windows is very strange. It allows read locks to be overlayed
 984          * (even over a write lock), but leaves the write lock in force until the first
 985          * unlock. It also reference counts the locks. This means the following sequence :
 986          *
 987          * process1                                      process2
 988          * ------------------------------------------------------------------------
 989          * WRITE LOCK : start = 2, len = 10
 990          *                                            READ LOCK: start =0, len = 10 - FAIL
 991          * READ LOCK : start = 0, len = 14
 992          *                                            READ LOCK: start =0, len = 10 - FAIL
 993          * UNLOCK : start = 2, len = 10
 994          *                                            READ LOCK: start =0, len = 10 - OK
 995          *
 996          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 997          * would leave a single read lock over the 0-14 region. In order to
 998          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
 999          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1000          * semantics that if a write lock is added, then it will be first in the array.
1001          */
1002
1003         if ((l_ctx = talloc_init()) == NULL) {
1004                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1005                 return True; /* Not a fatal error. */
1006         }
1007
1008         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1009                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1010                 talloc_destroy(l_ctx);
1011                 return True; /* Not a fatal error. */
1012         }
1013
1014         /*
1015          * Create the initial list entry containing the
1016          * lock we want to add.
1017          */
1018
1019         ZERO_STRUCTP(ll);
1020         ll->start = offset;
1021         ll->size = count;
1022
1023         DLIST_ADD(llist, ll);
1024
1025         /*
1026          * The following call calculates if there are any
1027          * overlapping locks held by this process on
1028          * fd's open on the same file and splits this list
1029          * into a list of lock ranges that do not overlap with existing
1030          * POSIX locks.
1031          */
1032
1033         llist = posix_lock_list(l_ctx, llist, fsp);
1034
1035         /*
1036          * Now we have the list of ranges to lock it is safe to add the
1037          * entry into the POSIX lock tdb. We take note of the entry we
1038          * added here in case we have to remove it on POSIX lock fail.
1039          */
1040
1041         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1042                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1043                 talloc_destroy(l_ctx);
1044                 return False;
1045         }
1046
1047         /*
1048          * Add the POSIX locks on the list of ranges returned.
1049          * As the lock is supposed to be added atomically, we need to
1050          * back out all the locks if any one of these calls fail.
1051          */
1052
1053         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1054                 offset = ll->start;
1055                 count = ll->size;
1056
1057                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1058                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1059
1060                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1061                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1062                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1063                         ret = False;
1064                         break;
1065                 }
1066         }
1067
1068         if (!ret) {
1069
1070                 /*
1071                  * Back out all the POSIX locks we have on fail.
1072                  */
1073
1074                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1075                         offset = ll->start;
1076                         count = ll->size;
1077
1078                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1079                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1080
1081                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1082                 }
1083
1084                 /*
1085                  * Remove the tdb entry for this lock.
1086                  */
1087
1088                 delete_posix_lock_entry_by_index(fsp,entry_num);
1089         }
1090
1091         talloc_destroy(l_ctx);
1092         return ret;
1093 }
1094
1095 /****************************************************************************
1096  POSIX function to release a lock. Returns True if the
1097  lock could be released, False if not.
1098 ****************************************************************************/
1099
1100 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1101 {
1102         SMB_OFF_T offset;
1103         SMB_OFF_T count;
1104         BOOL ret = True;
1105         TALLOC_CTX *ul_ctx = NULL;
1106         struct lock_list *ulist = NULL;
1107         struct lock_list *ul = NULL;
1108         struct posix_lock deleted_lock;
1109         int num_overlapped_entries;
1110
1111         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1112                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1113
1114         /*
1115          * If the requested lock won't fit in the POSIX range, we will
1116          * pretend it was successful.
1117          */
1118
1119         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1120                 return True;
1121
1122         /*
1123          * We treat this as one unlock request for POSIX accounting purposes even
1124          * if it may later be split into multiple smaller POSIX unlock ranges.
1125          * num_overlapped_entries is the number of existing locks that have any
1126          * overlap with this unlock request.
1127          */
1128
1129         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1130
1131         if (num_overlapped_entries == -1) {
1132         smb_panic("release_posix_lock: unable find entry to delete !\n");
1133         }
1134
1135         /*
1136          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1137          * a POSIX write lock, then before doing the unlock we need to downgrade
1138          * the POSIX lock to a read lock. This allows any overlapping read locks
1139          * to be atomically maintained.
1140          */
1141
1142         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1143                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1144                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1145                         return False;
1146                 }
1147         }
1148
1149         if ((ul_ctx = talloc_init()) == NULL) {
1150                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1151                 return True; /* Not a fatal error. */
1152         }
1153
1154         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1155                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1156                 talloc_destroy(ul_ctx);
1157                 return True; /* Not a fatal error. */
1158         }
1159
1160         /*
1161          * Create the initial list entry containing the
1162          * lock we want to remove.
1163          */
1164
1165         ZERO_STRUCTP(ul);
1166         ul->start = offset;
1167         ul->size = count;
1168
1169         DLIST_ADD(ulist, ul);
1170
1171         /*
1172          * The following call calculates if there are any
1173          * overlapping locks held by this process on
1174          * fd's open on the same file and creates a
1175          * list of unlock ranges that will allow
1176          * POSIX lock ranges to remain on the file whilst the
1177          * unlocks are performed.
1178          */
1179
1180         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1181
1182         /*
1183          * Release the POSIX locks on the list of ranges returned.
1184          */
1185
1186         for(; ulist; ulist = ulist->next) {
1187                 offset = ulist->start;
1188                 count = ulist->size;
1189
1190                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1191                         (double)offset, (double)count ));
1192
1193                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1194                         ret = False;
1195         }
1196
1197         talloc_destroy(ul_ctx);
1198
1199         return ret;
1200 }
1201
1202 /****************************************************************************
1203  Remove all lock entries for a specific dev/inode pair from the tdb.
1204 ****************************************************************************/
1205
1206 static void delete_posix_lock_entries(files_struct *fsp)
1207 {
1208         TDB_DATA kbuf = locking_key_fsp(fsp);
1209
1210         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1211                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1212 }
1213
1214 /****************************************************************************
1215  Debug function.
1216 ****************************************************************************/
1217
1218 static void dump_entry(struct posix_lock *pl)
1219 {
1220         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1221                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1222 }
1223
1224 /****************************************************************************
1225  Remove any locks on this fd. Called from file_close().
1226 ****************************************************************************/
1227
1228 void posix_locking_close_file(files_struct *fsp)
1229 {
1230         struct posix_lock *entries = NULL;
1231         size_t count, i;
1232
1233         /*
1234          * Optimization for the common case where we are the only
1235          * opener of a file. If all fd entries are our own, we don't
1236          * need to explicitly release all the locks via the POSIX functions,
1237          * we can just remove all the entries in the tdb and allow the
1238          * close to remove the real locks.
1239          */
1240
1241         count = get_posix_lock_entries(fsp, &entries);
1242
1243         if (count == 0) {
1244                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1245                 return;
1246         }
1247
1248         for (i = 0; i < count; i++) {
1249                 if (entries[i].fd != fsp->fd )
1250                         break;
1251
1252                 dump_entry(&entries[i]);
1253         }
1254
1255         if (i == count) {
1256                 /* All locks are ours. */
1257                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1258                         fsp->fsp_name, (unsigned int)count ));
1259                 SAFE_FREE(entries);
1260                 delete_posix_lock_entries(fsp);
1261                 return;
1262         }
1263
1264         /*
1265          * Difficult case. We need to delete all our locks, whilst leaving
1266          * all other POSIX locks in place.
1267          */
1268
1269         for (i = 0; i < count; i++) {
1270                 struct posix_lock *pl = &entries[i];
1271                 if (pl->fd == fsp->fd)
1272                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1273         }
1274         SAFE_FREE(entries);
1275 }
1276
1277 /*******************************************************************
1278  Create the in-memory POSIX lock databases.
1279 ********************************************************************/
1280
1281 BOOL posix_locking_init(int read_only)
1282 {
1283         if (posix_lock_tdb && posix_pending_close_tdb)
1284                 return True;
1285
1286         if (!posix_lock_tdb)
1287                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1288                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1289         if (!posix_lock_tdb) {
1290                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1291                 return False;
1292         }
1293         if (!posix_pending_close_tdb)
1294                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1295                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1296         if (!posix_pending_close_tdb) {
1297                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1298                 return False;
1299         }
1300
1301         return True;
1302 }
1303
1304 /*******************************************************************
1305  Delete the in-memory POSIX lock databases.
1306 ********************************************************************/
1307
1308 BOOL posix_locking_end(void)
1309 {
1310     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1311                 return False;
1312     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1313                 return False;
1314         return True;
1315 }