source3/locking/posix.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3    Locking functions
   4    Copyright (C) Jeremy Allison 1992-2000
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19
  20    Revision History:
  21
  22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  23 */
  24
  25 #include "includes.h"
  26
  27 /*
  28  * The POSIX locking database handle.
  29  */
  30
  31 static TDB_CONTEXT *posix_lock_tdb;
  32
  33 /*
  34  * The pending close database handle.
  35  */
  36
  37 static TDB_CONTEXT *posix_pending_close_tdb;
  38
  39 /*
  40  * The data in POSIX lock records is an unsorted linear array of these
  41  * records.  It is unnecessary to store the count as tdb provides the
  42  * size of the record.
  43  */
  44
  45 struct posix_lock {
  46         int fd;
  47         SMB_OFF_T start;
  48         SMB_OFF_T size;
  49         int lock_type;
  50 };
  51
  52 /*
  53  * The data in POSIX pending close records is an unsorted linear array of int
  54  * records.  It is unnecessary to store the count as tdb provides the
  55  * size of the record.
  56  */
  57
  58 /* The key used in both the POSIX databases. */
  59
  60 struct posix_lock_key {
  61         SMB_DEV_T device;
  62         SMB_INO_T inode;
  63 };
  64
  65 /*******************************************************************
  66  Form a static locking key for a dev/inode pair.
  67 ******************************************************************/
  68
  69 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  70 {
  71         static struct posix_lock_key key;
  72         TDB_DATA kbuf;
  73
  74         memset(&key, '\0', sizeof(key));
  75         key.device = dev;
  76         key.inode = inode;
  77         kbuf.dptr = (char *)&key;
  78         kbuf.dsize = sizeof(key);
  79         return kbuf;
  80 }
  81
  82 /*******************************************************************
  83  Convenience function to get a key from an fsp.
  84 ******************************************************************/
  85
  86 static TDB_DATA locking_key_fsp(files_struct *fsp)
  87 {
  88         return locking_key(fsp->dev, fsp->inode);
  89 }
  90
  91 /****************************************************************************
  92  Add an fd to the pending close tdb.
  93 ****************************************************************************/
  94
  95 static BOOL add_fd_to_close_entry(files_struct *fsp)
  96 {
  97         TDB_DATA kbuf = locking_key_fsp(fsp);
  98         TDB_DATA dbuf;
  99         char *tp;
 100
 101         dbuf.dptr = NULL;
 102
 103         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 104
 105         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
 106         if (!tp) {
 107                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 108                 SAFE_FREE(dbuf.dptr);
 109                 return False;
 110         } else
 111                 dbuf.dptr = tp;
 112
 113         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
 114         dbuf.dsize += sizeof(int);
 115
 116         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 117                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 118         }
 119
 120         SAFE_FREE(dbuf.dptr);
 121         return True;
 122 }
 123
 124 /****************************************************************************
 125  Remove all fd entries for a specific dev/inode pair from the tdb.
 126 ****************************************************************************/
 127
 128 static void delete_close_entries(files_struct *fsp)
 129 {
 130         TDB_DATA kbuf = locking_key_fsp(fsp);
 131
 132         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 133                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 134 }
 135
 136 /****************************************************************************
 137  Get the array of POSIX pending close records for an open fsp. Caller must
 138  free. Returns number of entries.
 139 ****************************************************************************/
 140
 141 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 142 {
 143         TDB_DATA kbuf = locking_key_fsp(fsp);
 144         TDB_DATA dbuf;
 145         size_t count = 0;
 146
 147         *entries = NULL;
 148         dbuf.dptr = NULL;
 149
 150         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 151
 152         if (!dbuf.dptr) {
 153                 return 0;
 154         }
 155
 156         *entries = (int *)dbuf.dptr;
 157         count = (size_t)(dbuf.dsize / sizeof(int));
 158
 159         return count;
 160 }
 161
 162 /****************************************************************************
 163  Get the array of POSIX locks for an fsp. Caller must free. Returns
 164  number of entries.
 165 ****************************************************************************/
 166
 167 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 168 {
 169         TDB_DATA kbuf = locking_key_fsp(fsp);
 170         TDB_DATA dbuf;
 171         size_t count = 0;
 172
 173         *entries = NULL;
 174
 175         dbuf.dptr = NULL;
 176
 177         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 178
 179         if (!dbuf.dptr) {
 180                 return 0;
 181         }
 182
 183         *entries = (struct posix_lock *)dbuf.dptr;
 184         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 185
 186         return count;
 187 }
 188
 189 /****************************************************************************
 190  Deal with pending closes needed by POSIX locking support.
 191  Note that posix_locking_close_file() is expected to have been called
 192  to delete all locks on this fsp before this function is called.
 193 ****************************************************************************/
 194
 195 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 196 {
 197         int saved_errno = 0;
 198         int ret;
 199         size_t count, i;
 200         struct posix_lock *entries = NULL;
 201         int *fd_array = NULL;
 202         BOOL locks_on_other_fds = False;
 203
 204         if (!lp_posix_locking(SNUM(conn))) {
 205                 /*
 206                  * No POSIX to worry about, just close.
 207                  */
 208                 ret = conn->vfs_ops.close(fsp,fsp->fd);
 209                 fsp->fd = -1;
 210                 return ret;
 211         }
 212
 213         /*
 214          * Get the number of outstanding POSIX locks on this dev/inode pair.
 215          */
 216
 217         count = get_posix_lock_entries(fsp, &entries);
 218
 219         /*
 220          * Check if there are any outstanding locks belonging to
 221          * other fd's. This should never be the case if posix_locking_close_file()
 222          * has been called first, but it never hurts to be *sure*.
 223          */
 224
 225         for (i = 0; i < count; i++) {
 226                 if (entries[i].fd != fsp->fd) {
 227                         locks_on_other_fds = True;
 228                         break;
 229                 }
 230         }
 231
 232         if (locks_on_other_fds) {
 233
 234                 /*
 235                  * There are outstanding locks on this dev/inode pair on other fds.
 236                  * Add our fd to the pending close tdb and set fsp->fd to -1.
 237                  */
 238
 239                 if (!add_fd_to_close_entry(fsp)) {
 240                         SAFE_FREE(entries);
 241                         return False;
 242                 }
 243
 244                 SAFE_FREE(entries);
 245                 fsp->fd = -1;
 246                 return 0;
 247         }
 248
 249         SAFE_FREE(entries);
 250
 251         /*
 252          * No outstanding POSIX locks. Get the pending close fd's
 253          * from the tdb and close them all.
 254          */
 255
 256         count = get_posix_pending_close_entries(fsp, &fd_array);
 257
 258         if (count) {
 259                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 260
 261                 for(i = 0; i < count; i++) {
 262                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
 263                                 saved_errno = errno;
 264                         }
 265                 }
 266
 267                 /*
 268                  * Delete all fd's stored in the tdb
 269                  * for this dev/inode pair.
 270                  */
 271
 272                 delete_close_entries(fsp);
 273         }
 274
 275         SAFE_FREE(fd_array);
 276
 277         /*
 278          * Finally close the fd associated with this fsp.
 279          */
 280
 281         ret = conn->vfs_ops.close(fsp,fsp->fd);
 282
 283         if (saved_errno != 0) {
 284         errno = saved_errno;
 285                 ret = -1;
 286     }
 287
 288         fsp->fd = -1;
 289
 290         return ret;
 291 }
 292
 293 /****************************************************************************
 294  Debugging aid :-).
 295 ****************************************************************************/
 296
 297 static const char *posix_lock_type_name(int lock_type)
 298 {
 299         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 300 }
 301
 302 /****************************************************************************
 303  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
 304  then the POSIX fcntl lock fails.
 305 ****************************************************************************/
 306
 307 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
 308 {
 309         TDB_DATA kbuf = locking_key_fsp(fsp);
 310         TDB_DATA dbuf;
 311         struct posix_lock *locks;
 312         size_t count;
 313
 314         dbuf.dptr = NULL;
 315
 316         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 317
 318         if (!dbuf.dptr) {
 319                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
 320                 goto fail;
 321         }
 322
 323         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 324         locks = (struct posix_lock *)dbuf.dptr;
 325
 326         if (count == 1) {
 327                 tdb_delete(posix_lock_tdb, kbuf);
 328         } else {
 329                 if (entry < count-1) {
 330                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
 331                 }
 332                 dbuf.dsize -= sizeof(*locks);
 333                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 334         }
 335
 336         SAFE_FREE(dbuf.dptr);
 337
 338         return True;
 339
 340  fail:
 341
 342         SAFE_FREE(dbuf.dptr);
 343         return False;
 344 }
 345
 346 /****************************************************************************
 347  Add an entry into the POSIX locking tdb. We return the index number of the
 348  added lock (used in case we need to delete *exactly* this entry). Returns
 349  False on fail, True on success.
 350 ****************************************************************************/
 351
 352 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
 353 {
 354         TDB_DATA kbuf = locking_key_fsp(fsp);
 355         TDB_DATA dbuf;
 356         struct posix_lock pl;
 357         char *tp;
 358
 359         dbuf.dptr = NULL;
 360
 361         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 362
 363         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
 364
 365         /*
 366          * Add new record.
 367          */
 368
 369         pl.fd = fsp->fd;
 370         pl.start = start;
 371         pl.size = size;
 372         pl.lock_type = lock_type;
 373
 374         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
 375         if (!tp) {
 376                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 377                 goto fail;
 378         } else
 379                 dbuf.dptr = tp;
 380
 381         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
 382         dbuf.dsize += sizeof(pl);
 383
 384         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 385                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 386                 goto fail;
 387         }
 388
 389         SAFE_FREE(dbuf.dptr);
 390
 391         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
 392                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
 393                         (double)fsp->dev, (double)fsp->inode ));
 394
 395         return True;
 396
 397  fail:
 398
 399         SAFE_FREE(dbuf.dptr);
 400         return False;
 401 }
 402
 403 /****************************************************************************
 404  Calculate if locks have any overlap at all.
 405 ****************************************************************************/
 406
 407 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
 408 {
 409         if (start1 >= start2 && start1 <= start2 + size2)
 410                 return True;
 411
 412         if (start1 < start2 && start1 + size1 > start2)
 413                 return True;
 414
 415         return False;
 416 }
 417
 418 /****************************************************************************
 419  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 420  deleted and the number of records that are overlapped by this one, or -1 on error.
 421 ****************************************************************************/
 422
 423 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 424 {
 425         TDB_DATA kbuf = locking_key_fsp(fsp);
 426         TDB_DATA dbuf;
 427         struct posix_lock *locks;
 428         size_t i, count;
 429         BOOL found = False;
 430         int num_overlapping_records = 0;
 431
 432         dbuf.dptr = NULL;
 433
 434         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 435
 436         if (!dbuf.dptr) {
 437                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 438                 goto fail;
 439         }
 440
 441         /* There are existing locks - find a match. */
 442         locks = (struct posix_lock *)dbuf.dptr;
 443         count = (size_t)(dbuf.dsize / sizeof(*locks));
 444
 445         /*
 446          * Search for and delete the first record that matches the
 447          * unlock criteria.
 448          */
 449
 450         for (i=0; i<count; i++) {
 451                 struct posix_lock *entry = &locks[i];
 452
 453                 if (entry->fd == fsp->fd &&
 454                         entry->start == start &&
 455                         entry->size == size) {
 456
 457                         /* Make a copy if requested. */
 458                         if (pl)
 459                                 *pl = *entry;
 460
 461                         /* Found it - delete it. */
 462                         if (count == 1) {
 463                                 tdb_delete(posix_lock_tdb, kbuf);
 464                         } else {
 465                                 if (i < count-1) {
 466                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
 467                                 }
 468                                 dbuf.dsize -= sizeof(*locks);
 469                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 470                         }
 471                         count--;
 472                         found = True;
 473                         break;
 474                 }
 475         }
 476
 477         if (!found)
 478                 goto fail;
 479
 480         /*
 481          * Count the number of entries that are
 482          * overlapped by this unlock request.
 483          */
 484
 485         for (i = 0; i < count; i++) {
 486                 struct posix_lock *entry = &locks[i];
 487
 488                 if (fsp->fd == entry->fd &&
 489                         does_lock_overlap( start, size, entry->start, entry->size))
 490                                 num_overlapping_records++;
 491         }
 492
 493         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 494                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
 495                                 (unsigned int)num_overlapping_records ));
 496
 497         SAFE_FREE(dbuf.dptr);
 498
 499         return num_overlapping_records;
 500
 501  fail:
 502
 503         SAFE_FREE(dbuf.dptr);
 504         return -1;
 505 }
 506
 507 /****************************************************************************
 508  Utility function to map a lock type correctly depending on the open
 509  mode of a file.
 510 ****************************************************************************/
 511
 512 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 513 {
 514         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 515                 /*
 516                  * Many UNIX's cannot get a write lock on a file opened read-only.
 517                  * Win32 locking semantics allow this.
 518                  * Do the best we can and attempt a read-only lock.
 519                  */
 520                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 521                 return F_RDLCK;
 522         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
 523                 /*
 524                  * Ditto for read locks on write only files.
 525                  */
 526                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 527                 return F_WRLCK;
 528         }
 529
 530   /*
 531    * This return should be the most normal, as we attempt
 532    * to always open files read/write.
 533    */
 534
 535   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 536 }
 537
 538 /****************************************************************************
 539  Check to see if the given unsigned lock range is within the possible POSIX
 540  range. Modifies the given args to be in range if possible, just returns
 541  False if not.
 542 ****************************************************************************/
 543
 544 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 545                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 546 {
 547         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
 548         SMB_OFF_T count = (SMB_OFF_T)u_count;
 549
 550         /*
 551          * For the type of system we are, attempt to
 552          * find the maximum positive lock offset as an SMB_OFF_T.
 553          */
 554
 555 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
 556
 557         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
 558
 559 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 560
 561         /*
 562          * In this case SMB_OFF_T is 64 bits,
 563          * and the underlying system can handle 64 bit signed locks.
 564          */
 565
 566     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 567     SMB_OFF_T mask = (mask2<<1);
 568     SMB_OFF_T max_positive_lock_offset = ~mask;
 569
 570 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 571
 572         /*
 573          * In this case either SMB_OFF_T is 32 bits,
 574          * or the underlying system cannot handle 64 bit signed locks.
 575          * All offsets & counts must be 2^31 or less.
 576          */
 577
 578     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
 579
 580 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 581
 582         /*
 583          * POSIX locks of length zero mean lock to end-of-file.
 584          * Win32 locks of length zero are point probes. Ignore
 585          * any Win32 locks of length zero. JRA.
 586          */
 587
 588         if (count == (SMB_OFF_T)0) {
 589                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 590                 return False;
 591         }
 592
 593         /*
 594          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 595          * ignore this lock.
 596          */
 597
 598         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
 599                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 600                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
 601                 return False;
 602         }
 603
 604         /*
 605          * We must truncate the count to less than max_positive_lock_offset.
 606          */
 607
 608         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
 609                 count = max_positive_lock_offset;
 610
 611         /*
 612          * Truncate count to end at max lock offset.
 613          */
 614
 615         if (offset + count < 0 || offset + count > max_positive_lock_offset)
 616                 count = max_positive_lock_offset - offset;
 617
 618         /*
 619          * If we ate all the count, ignore this lock.
 620          */
 621
 622         if (count == 0) {
 623                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 624                                 (double)u_offset, (double)u_count ));
 625                 return False;
 626         }
 627
 628         /*
 629          * The mapping was successful.
 630          */
 631
 632         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 633                         (double)offset, (double)count ));
 634
 635         *offset_out = offset;
 636         *count_out = count;
 637
 638         return True;
 639 }
 640
 641 /****************************************************************************
 642  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 643  broken NFS implementations.
 644 ****************************************************************************/
 645
 646 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 647 {
 648         int ret;
 649         struct connection_struct *conn = fsp->conn;
 650
 651         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
 652
 653         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 654
 655         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 656
 657                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 658                                         (double)offset,(double)count));
 659                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 660                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
 661
 662                 /*
 663                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 664                  * 32 bit NFS mounted filesystems. Just ignore it.
 665                  */
 666
 667                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
 668                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 669                         return True;
 670                 }
 671
 672                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
 673                         /* 32 bit NFS file system, retry with smaller offset */
 674                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 675                         errno = 0;
 676                         count &= 0x7fffffff;
 677                         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 678                 }
 679         }
 680
 681         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 682
 683         return ret;
 684 }
 685
 686 /****************************************************************************
 687  POSIX function to see if a file region is locked. Returns True if the
 688  region is locked, False otherwise.
 689 ****************************************************************************/
 690
 691 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 692 {
 693         SMB_OFF_T offset;
 694         SMB_OFF_T count;
 695         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 696
 697         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 698                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 699
 700         /*
 701          * If the requested lock won't fit in the POSIX range, we will
 702          * never set it, so presume it is not locked.
 703          */
 704
 705         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 706                 return False;
 707
 708         /*
 709          * Note that most UNIX's can *test* for a write lock on
 710          * a read-only fd, just not *set* a write lock on a read-only
 711          * fd. So we don't need to use map_lock_type here.
 712          */
 713
 714         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 715 }
 716
 717 /*
 718  * Structure used when splitting a lock range
 719  * into a POSIX lock range. Doubly linked list.
 720  */
 721
 722 struct lock_list {
 723     struct lock_list *next;
 724     struct lock_list *prev;
 725     SMB_OFF_T start;
 726     SMB_OFF_T size;
 727 };
 728
 729 /****************************************************************************
 730  Create a list of lock ranges that don't overlap a given range. Used in calculating
 731  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 732  understand it :-).
 733 ****************************************************************************/
 734
 735 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
 736 {
 737         TDB_DATA kbuf = locking_key_fsp(fsp);
 738         TDB_DATA dbuf;
 739         struct posix_lock *locks;
 740         size_t num_locks, i;
 741
 742         dbuf.dptr = NULL;
 743
 744         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 745
 746         if (!dbuf.dptr)
 747                 return lhead;
 748
 749         locks = (struct posix_lock *)dbuf.dptr;
 750         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
 751
 752         /*
 753          * Check the current lock list on this dev/inode pair.
 754          * Quit if the list is deleted.
 755          */
 756
 757         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 758                 (double)lhead->start, (double)lhead->size ));
 759
 760         for (i=0; i<num_locks && lhead; i++) {
 761
 762                 struct posix_lock *lock = &locks[i];
 763                 struct lock_list *l_curr;
 764
 765                 /*
 766                  * Walk the lock list, checking for overlaps. Note that
 767                  * the lock list can expand within this loop if the current
 768                  * range being examined needs to be split.
 769                  */
 770
 771                 for (l_curr = lhead; l_curr;) {
 772
 773                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
 774                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 775
 776                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 777                                  (lock->start >= (l_curr->start + l_curr->size))) {
 778
 779                                 /* No overlap with this lock - leave this range alone. */
 780 /*********************************************
 781                                              +---------+
 782                                              | l_curr  |
 783                                              +---------+
 784                                 +-------+
 785                                 | lock  |
 786                                 +-------+
 787 OR....
 788              +---------+
 789              |  l_curr |
 790              +---------+
 791 **********************************************/
 792
 793                                 DEBUG(10,("no overlap case.\n" ));
 794
 795                                 l_curr = l_curr->next;
 796
 797                         } else if ( (l_curr->start >= lock->start) &&
 798                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 799
 800                                 /*
 801                                  * This unlock is completely overlapped by this existing lock range
 802                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 803                                  */
 804 /*********************************************
 805                 +---------+
 806                 |  l_curr |
 807                 +---------+
 808         +---------------------------+
 809         |       lock                |
 810         +---------------------------+
 811 **********************************************/
 812                                 /* Save the next pointer */
 813                                 struct lock_list *ul_next = l_curr->next;
 814
 815                                 DEBUG(10,("delete case.\n" ));
 816
 817                                 DLIST_REMOVE(lhead, l_curr);
 818                                 if(lhead == NULL)
 819                                         break; /* No more list... */
 820
 821                                 l_curr = ul_next;
 822
 823                         } else if ( (l_curr->start >= lock->start) &&
 824                                                 (l_curr->start < lock->start + lock->size) &&
 825                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 826
 827                                 /*
 828                                  * This unlock overlaps the existing lock range at the high end.
 829                                  * Truncate by moving start to existing range end and reducing size.
 830                                  */
 831 /*********************************************
 832                 +---------------+
 833                 |  l_curr       |
 834                 +---------------+
 835         +---------------+
 836         |    lock       |
 837         +---------------+
 838 BECOMES....
 839                         +-------+
 840                         | l_curr|
 841                         +-------+
 842 **********************************************/
 843
 844                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 845                                 l_curr->start = lock->start + lock->size;
 846
 847                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
 848                                                                 (double)l_curr->start, (double)l_curr->size ));
 849
 850                                 l_curr = l_curr->next;
 851
 852                         } else if ( (l_curr->start < lock->start) &&
 853                                                 (l_curr->start + l_curr->size > lock->start) &&
 854                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 855
 856                                 /*
 857                                  * This unlock overlaps the existing lock range at the low end.
 858                                  * Truncate by reducing size.
 859                                  */
 860 /*********************************************
 861    +---------------+
 862    |  l_curr       |
 863    +---------------+
 864            +---------------+
 865            |    lock       |
 866            +---------------+
 867 BECOMES....
 868    +-------+
 869    | l_curr|
 870    +-------+
 871 **********************************************/
 872
 873                                 l_curr->size = lock->start - l_curr->start;
 874
 875                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
 876                                                                 (double)l_curr->start, (double)l_curr->size ));
 877
 878                                 l_curr = l_curr->next;
 879
 880                         } else if ( (l_curr->start < lock->start) &&
 881                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 882                                 /*
 883                                  * Worst case scenario. Unlock request completely overlaps an existing
 884                                  * lock range. Split the request into two, push the new (upper) request
 885                                  * into the dlink list, and continue with the entry after ul_new (as we
 886                                  * know that ul_new will not overlap with this lock).
 887                                  */
 888 /*********************************************
 889         +---------------------------+
 890         |        l_curr             |
 891         +---------------------------+
 892                 +---------+
 893                 | lock    |
 894                 +---------+
 895 BECOMES.....
 896         +-------+         +---------+
 897         | l_curr|         | l_new   |
 898         +-------+         +---------+
 899 **********************************************/
 900                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
 901                                                                                                         sizeof(struct lock_list));
 902
 903                                 if(l_new == NULL) {
 904                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 905                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 906                                 }
 907
 908                                 ZERO_STRUCTP(l_new);
 909                                 l_new->start = lock->start + lock->size;
 910                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 911
 912                                 /* Truncate the l_curr. */
 913                                 l_curr->size = lock->start - l_curr->start;
 914
 915                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
 916 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 917                                                                 (double)l_new->start, (double)l_new->size ));
 918
 919                                 /*
 920                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 921                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
 922                                  */
 923
 924                                 l_new->prev = l_curr;
 925                                 l_new->next = l_curr->next;
 926                                 l_curr->next = l_new;
 927
 928                                 /* And move after the link we added. */
 929                                 l_curr = l_new->next;
 930
 931                         } else {
 932
 933                                 /*
 934                                  * This logic case should never happen. Ensure this is the
 935                                  * case by forcing an abort.... Remove in production.
 936                                  */
 937                                 pstring msg;
 938
 939                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 940 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
 941
 942                                 smb_panic(msg);
 943                         }
 944                 } /* end for ( l_curr = lhead; l_curr;) */
 945         } /* end for (i=0; i<num_locks && ul_head; i++) */
 946
 947         SAFE_FREE(dbuf.dptr);
 948
 949         return lhead;
 950 }
 951
 952 /****************************************************************************
 953  POSIX function to acquire a lock. Returns True if the
 954  lock could be granted, False if not.
 955 ****************************************************************************/
 956
 957 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 958 {
 959         SMB_OFF_T offset;
 960         SMB_OFF_T count;
 961         BOOL ret = True;
 962         size_t entry_num = 0;
 963         size_t lock_count;
 964         TALLOC_CTX *l_ctx = NULL;
 965         struct lock_list *llist = NULL;
 966         struct lock_list *ll = NULL;
 967         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 968
 969         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 970                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 971
 972         /*
 973          * If the requested lock won't fit in the POSIX range, we will
 974          * pretend it was successful.
 975          */
 976
 977         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 978                 return True;
 979
 980         /*
 981          * Windows is very strange. It allows read locks to be overlayed
 982          * (even over a write lock), but leaves the write lock in force until the first
 983          * unlock. It also reference counts the locks. This means the following sequence :
 984          *
 985          * process1                                      process2
 986          * ------------------------------------------------------------------------
 987          * WRITE LOCK : start = 2, len = 10
 988          *                                            READ LOCK: start =0, len = 10 - FAIL
 989          * READ LOCK : start = 0, len = 14
 990          *                                            READ LOCK: start =0, len = 10 - FAIL
 991          * UNLOCK : start = 2, len = 10
 992          *                                            READ LOCK: start =0, len = 10 - OK
 993          *
 994          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 995          * would leave a single read lock over the 0-14 region. In order to
 996          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
 997          * entries, one for each overlayed lock request. We are guarenteed by the brlock
 998          * semantics that if a write lock is added, then it will be first in the array.
 999          */
1000
1001         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1002                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1003                 return True; /* Not a fatal error. */
1004         }
1005
1006         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1007                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1008                 talloc_destroy(l_ctx);
1009                 return True; /* Not a fatal error. */
1010         }
1011
1012         /*
1013          * Create the initial list entry containing the
1014          * lock we want to add.
1015          */
1016
1017         ZERO_STRUCTP(ll);
1018         ll->start = offset;
1019         ll->size = count;
1020
1021         DLIST_ADD(llist, ll);
1022
1023         /*
1024          * The following call calculates if there are any
1025          * overlapping locks held by this process on
1026          * fd's open on the same file and splits this list
1027          * into a list of lock ranges that do not overlap with existing
1028          * POSIX locks.
1029          */
1030
1031         llist = posix_lock_list(l_ctx, llist, fsp);
1032
1033         /*
1034          * Now we have the list of ranges to lock it is safe to add the
1035          * entry into the POSIX lock tdb. We take note of the entry we
1036          * added here in case we have to remove it on POSIX lock fail.
1037          */
1038
1039         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1040                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1041                 talloc_destroy(l_ctx);
1042                 return False;
1043         }
1044
1045         /*
1046          * Add the POSIX locks on the list of ranges returned.
1047          * As the lock is supposed to be added atomically, we need to
1048          * back out all the locks if any one of these calls fail.
1049          */
1050
1051         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1052                 offset = ll->start;
1053                 count = ll->size;
1054
1055                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1056                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1057
1058                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1059                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1060                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1061                         ret = False;
1062                         break;
1063                 }
1064         }
1065
1066         if (!ret) {
1067
1068                 /*
1069                  * Back out all the POSIX locks we have on fail.
1070                  */
1071
1072                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1073                         offset = ll->start;
1074                         count = ll->size;
1075
1076                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1077                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1078
1079                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1080                 }
1081
1082                 /*
1083                  * Remove the tdb entry for this lock.
1084                  */
1085
1086                 delete_posix_lock_entry_by_index(fsp,entry_num);
1087         }
1088
1089         talloc_destroy(l_ctx);
1090         return ret;
1091 }
1092
1093 /****************************************************************************
1094  POSIX function to release a lock. Returns True if the
1095  lock could be released, False if not.
1096 ****************************************************************************/
1097
1098 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1099 {
1100         SMB_OFF_T offset;
1101         SMB_OFF_T count;
1102         BOOL ret = True;
1103         TALLOC_CTX *ul_ctx = NULL;
1104         struct lock_list *ulist = NULL;
1105         struct lock_list *ul = NULL;
1106         struct posix_lock deleted_lock;
1107         int num_overlapped_entries;
1108
1109         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1110                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1111
1112         /*
1113          * If the requested lock won't fit in the POSIX range, we will
1114          * pretend it was successful.
1115          */
1116
1117         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1118                 return True;
1119
1120         /*
1121          * We treat this as one unlock request for POSIX accounting purposes even
1122          * if it may later be split into multiple smaller POSIX unlock ranges.
1123          * num_overlapped_entries is the number of existing locks that have any
1124          * overlap with this unlock request.
1125          */
1126
1127         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1128
1129         if (num_overlapped_entries == -1) {
1130         smb_panic("release_posix_lock: unable find entry to delete !\n");
1131         }
1132
1133         /*
1134          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1135          * a POSIX write lock, then before doing the unlock we need to downgrade
1136          * the POSIX lock to a read lock. This allows any overlapping read locks
1137          * to be atomically maintained.
1138          */
1139
1140         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1141                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1142                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1143                         return False;
1144                 }
1145         }
1146
1147         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1148                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1149                 return True; /* Not a fatal error. */
1150         }
1151
1152         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1153                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1154                 talloc_destroy(ul_ctx);
1155                 return True; /* Not a fatal error. */
1156         }
1157
1158         /*
1159          * Create the initial list entry containing the
1160          * lock we want to remove.
1161          */
1162
1163         ZERO_STRUCTP(ul);
1164         ul->start = offset;
1165         ul->size = count;
1166
1167         DLIST_ADD(ulist, ul);
1168
1169         /*
1170          * The following call calculates if there are any
1171          * overlapping locks held by this process on
1172          * fd's open on the same file and creates a
1173          * list of unlock ranges that will allow
1174          * POSIX lock ranges to remain on the file whilst the
1175          * unlocks are performed.
1176          */
1177
1178         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1179
1180         /*
1181          * Release the POSIX locks on the list of ranges returned.
1182          */
1183
1184         for(; ulist; ulist = ulist->next) {
1185                 offset = ulist->start;
1186                 count = ulist->size;
1187
1188                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1189                         (double)offset, (double)count ));
1190
1191                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1192                         ret = False;
1193         }
1194
1195         talloc_destroy(ul_ctx);
1196
1197         return ret;
1198 }
1199
1200 /****************************************************************************
1201  Remove all lock entries for a specific dev/inode pair from the tdb.
1202 ****************************************************************************/
1203
1204 static void delete_posix_lock_entries(files_struct *fsp)
1205 {
1206         TDB_DATA kbuf = locking_key_fsp(fsp);
1207
1208         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1209                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1210 }
1211
1212 /****************************************************************************
1213  Debug function.
1214 ****************************************************************************/
1215
1216 static void dump_entry(struct posix_lock *pl)
1217 {
1218         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1219                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1220 }
1221
1222 /****************************************************************************
1223  Remove any locks on this fd. Called from file_close().
1224 ****************************************************************************/
1225
1226 void posix_locking_close_file(files_struct *fsp)
1227 {
1228         struct posix_lock *entries = NULL;
1229         size_t count, i;
1230
1231         /*
1232          * Optimization for the common case where we are the only
1233          * opener of a file. If all fd entries are our own, we don't
1234          * need to explicitly release all the locks via the POSIX functions,
1235          * we can just remove all the entries in the tdb and allow the
1236          * close to remove the real locks.
1237          */
1238
1239         count = get_posix_lock_entries(fsp, &entries);
1240
1241         if (count == 0) {
1242                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1243                 return;
1244         }
1245
1246         for (i = 0; i < count; i++) {
1247                 if (entries[i].fd != fsp->fd )
1248                         break;
1249
1250                 dump_entry(&entries[i]);
1251         }
1252
1253         if (i == count) {
1254                 /* All locks are ours. */
1255                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1256                         fsp->fsp_name, (unsigned int)count ));
1257                 SAFE_FREE(entries);
1258                 delete_posix_lock_entries(fsp);
1259                 return;
1260         }
1261
1262         /*
1263          * Difficult case. We need to delete all our locks, whilst leaving
1264          * all other POSIX locks in place.
1265          */
1266
1267         for (i = 0; i < count; i++) {
1268                 struct posix_lock *pl = &entries[i];
1269                 if (pl->fd == fsp->fd)
1270                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1271         }
1272         SAFE_FREE(entries);
1273 }
1274
1275 /*******************************************************************
1276  Create the in-memory POSIX lock databases.
1277 ********************************************************************/
1278
1279 BOOL posix_locking_init(int read_only)
1280 {
1281         if (posix_lock_tdb && posix_pending_close_tdb)
1282                 return True;
1283
1284         if (!posix_lock_tdb)
1285                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1286                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1287         if (!posix_lock_tdb) {
1288                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1289                 return False;
1290         }
1291         if (!posix_pending_close_tdb)
1292                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1293                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1294         if (!posix_pending_close_tdb) {
1295                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1296                 return False;
1297         }
1298
1299         return True;
1300 }
1301
1302 /*******************************************************************
1303  Delete the in-memory POSIX lock databases.
1304 ********************************************************************/
1305
1306 BOOL posix_locking_end(void)
1307 {
1308     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1309                 return False;
1310     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1311                 return False;
1312         return True;
1313 }