source3/locking/posix.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 3.0
   4    Locking functions
   5    Copyright (C) Jeremy Allison 1992-2000
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21    Revision History:
  22
  23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  24 */
  25
  26 #include "includes.h"
  27 extern int DEBUGLEVEL;
  28
  29 /*
  30  * The POSIX locking database handle.
  31  */
  32
  33 static TDB_CONTEXT *posix_lock_tdb;
  34
  35 /*
  36  * The pending close database handle.
  37  */
  38
  39 static TDB_CONTEXT *posix_pending_close_tdb;
  40
  41 /*
  42  * The data in POSIX lock records is an unsorted linear array of these
  43  * records.  It is unnecessary to store the count as tdb provides the
  44  * size of the record.
  45  */
  46
  47 struct posix_lock {
  48         int fd;
  49         SMB_OFF_T start;
  50         SMB_OFF_T size;
  51         int lock_type;
  52 };
  53
  54 /*
  55  * The data in POSIX pending close records is an unsorted linear array of int
  56  * records.  It is unnecessary to store the count as tdb provides the
  57  * size of the record.
  58  */
  59
  60 /* The key used in both the POSIX databases. */
  61
  62 struct posix_lock_key {
  63         SMB_DEV_T device;
  64         SMB_INO_T inode;
  65 };
  66
  67 /*******************************************************************
  68  Form a static locking key for a dev/inode pair.
  69 ******************************************************************/
  70
  71 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  72 {
  73         static struct posix_lock_key key;
  74         TDB_DATA kbuf;
  75
  76         memset(&key, '\0', sizeof(key));
  77         key.device = dev;
  78         key.inode = inode;
  79         kbuf.dptr = (char *)&key;
  80         kbuf.dsize = sizeof(key);
  81         return kbuf;
  82 }
  83
  84 /*******************************************************************
  85  Convenience function to get a key from an fsp.
  86 ******************************************************************/
  87
  88 static TDB_DATA locking_key_fsp(files_struct *fsp)
  89 {
  90         return locking_key(fsp->dev, fsp->inode);
  91 }
  92
  93 /****************************************************************************
  94  Add an fd to the pending close tdb.
  95 ****************************************************************************/
  96
  97 static BOOL add_fd_to_close_entry(files_struct *fsp)
  98 {
  99         TDB_DATA kbuf = locking_key_fsp(fsp);
 100         TDB_DATA dbuf;
 101         char *tp;
 102
 103         dbuf.dptr = NULL;
 104
 105         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 106
 107         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
 108         if (!tp) {
 109                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 110                 SAFE_FREE(dbuf.dptr);
 111                 return False;
 112         } else
 113                 dbuf.dptr = tp;
 114
 115         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
 116         dbuf.dsize += sizeof(int);
 117
 118         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 119                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 120         }
 121
 122         SAFE_FREE(dbuf.dptr);
 123         return True;
 124 }
 125
 126 /****************************************************************************
 127  Remove all fd entries for a specific dev/inode pair from the tdb.
 128 ****************************************************************************/
 129
 130 static void delete_close_entries(files_struct *fsp)
 131 {
 132         TDB_DATA kbuf = locking_key_fsp(fsp);
 133
 134         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 135                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 136 }
 137
 138 /****************************************************************************
 139  Get the array of POSIX pending close records for an open fsp. Caller must
 140  free. Returns number of entries.
 141 ****************************************************************************/
 142
 143 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 144 {
 145         TDB_DATA kbuf = locking_key_fsp(fsp);
 146         TDB_DATA dbuf;
 147         size_t count = 0;
 148
 149         *entries = NULL;
 150         dbuf.dptr = NULL;
 151
 152         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 153
 154     if (!dbuf.dptr) {
 155                 return 0;
 156         }
 157
 158         *entries = (int *)dbuf.dptr;
 159         count = (size_t)(dbuf.dsize / sizeof(int));
 160
 161         return count;
 162 }
 163
 164 /****************************************************************************
 165  Get the array of POSIX locks for an fsp. Caller must free. Returns
 166  number of entries.
 167 ****************************************************************************/
 168
 169 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 170 {
 171         TDB_DATA kbuf = locking_key_fsp(fsp);
 172         TDB_DATA dbuf;
 173         size_t count = 0;
 174
 175         *entries = NULL;
 176
 177         dbuf.dptr = NULL;
 178
 179         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 180
 181     if (!dbuf.dptr) {
 182                 return 0;
 183         }
 184
 185         *entries = (struct posix_lock *)dbuf.dptr;
 186         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 187
 188         return count;
 189 }
 190
 191 /****************************************************************************
 192  Deal with pending closes needed by POSIX locking support.
 193  Note that posix_locking_close_file() is expected to have been called
 194  to delete all locks on this fsp before this function is called.
 195 ****************************************************************************/
 196
 197 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 198 {
 199         int saved_errno = 0;
 200         int ret;
 201         size_t count, i;
 202         struct posix_lock *entries = NULL;
 203         int *fd_array = NULL;
 204         BOOL locks_on_other_fds = False;
 205
 206         if (!lp_posix_locking(SNUM(conn))) {
 207                 /*
 208                  * No POSIX to worry about, just close.
 209                  */
 210                 ret = conn->vfs_ops.close(fsp,fsp->fd);
 211                 fsp->fd = -1;
 212                 return ret;
 213         }
 214
 215         /*
 216          * Get the number of outstanding POSIX locks on this dev/inode pair.
 217          */
 218
 219         count = get_posix_lock_entries(fsp, &entries);
 220
 221         /*
 222          * Check if there are any outstanding locks belonging to
 223          * other fd's. This should never be the case if posix_locking_close_file()
 224          * has been called first, but it never hurts to be *sure*.
 225          */
 226
 227         for (i = 0; i < count; i++) {
 228                 if (entries[i].fd != fsp->fd) {
 229                         locks_on_other_fds = True;
 230                         break;
 231                 }
 232         }
 233
 234         if (locks_on_other_fds) {
 235
 236                 /*
 237                  * There are outstanding locks on this dev/inode pair on other fds.
 238                  * Add our fd to the pending close tdb and set fsp->fd to -1.
 239                  */
 240
 241                 if (!add_fd_to_close_entry(fsp)) {
 242                         SAFE_FREE(entries);
 243                         return False;
 244                 }
 245
 246                 SAFE_FREE(entries);
 247                 fsp->fd = -1;
 248                 return 0;
 249         }
 250
 251         SAFE_FREE(entries);
 252
 253         /*
 254          * No outstanding POSIX locks. Get the pending close fd's
 255          * from the tdb and close them all.
 256          */
 257
 258         count = get_posix_pending_close_entries(fsp, &fd_array);
 259
 260         if (count) {
 261                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 262
 263                 for(i = 0; i < count; i++) {
 264                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
 265                                 saved_errno = errno;
 266                         }
 267                 }
 268
 269                 /*
 270                  * Delete all fd's stored in the tdb
 271                  * for this dev/inode pair.
 272                  */
 273
 274                 delete_close_entries(fsp);
 275         }
 276
 277         SAFE_FREE(fd_array);
 278
 279         /*
 280          * Finally close the fd associated with this fsp.
 281          */
 282
 283         ret = conn->vfs_ops.close(fsp,fsp->fd);
 284
 285         if (saved_errno != 0) {
 286         errno = saved_errno;
 287                 ret = -1;
 288     }
 289
 290         fsp->fd = -1;
 291
 292         return ret;
 293 }
 294
 295 /****************************************************************************
 296  Debugging aid :-).
 297 ****************************************************************************/
 298
 299 static const char *posix_lock_type_name(int lock_type)
 300 {
 301         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 302 }
 303
 304 /****************************************************************************
 305  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
 306  then the POSIX fcntl lock fails.
 307 ****************************************************************************/
 308
 309 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
 310 {
 311         TDB_DATA kbuf = locking_key_fsp(fsp);
 312         TDB_DATA dbuf;
 313         struct posix_lock *locks;
 314         size_t count;
 315
 316         dbuf.dptr = NULL;
 317
 318         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 319
 320         if (!dbuf.dptr) {
 321                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
 322                 goto fail;
 323         }
 324
 325         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 326         locks = (struct posix_lock *)dbuf.dptr;
 327
 328         if (count == 1) {
 329                 tdb_delete(posix_lock_tdb, kbuf);
 330         } else {
 331                 if (entry < count-1) {
 332                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
 333                 }
 334                 dbuf.dsize -= sizeof(*locks);
 335                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 336         }
 337
 338         SAFE_FREE(dbuf.dptr);
 339
 340         return True;
 341
 342  fail:
 343     SAFE_FREE(dbuf.dptr);
 344     return False;
 345 }
 346
 347 /****************************************************************************
 348  Add an entry into the POSIX locking tdb. We return the index number of the
 349  added lock (used in case we need to delete *exactly* this entry). Returns
 350  False on fail, True on success.
 351 ****************************************************************************/
 352
 353 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
 354 {
 355         TDB_DATA kbuf = locking_key_fsp(fsp);
 356         TDB_DATA dbuf;
 357         struct posix_lock pl;
 358         char *tp;
 359
 360         dbuf.dptr = NULL;
 361
 362         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 363
 364         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
 365
 366         /*
 367          * Add new record.
 368          */
 369
 370         pl.fd = fsp->fd;
 371         pl.start = start;
 372         pl.size = size;
 373         pl.lock_type = lock_type;
 374
 375         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
 376         if (!tp) {
 377                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 378                 goto fail;
 379         } else
 380                 dbuf.dptr = tp;
 381
 382         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
 383         dbuf.dsize += sizeof(pl);
 384
 385         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 386                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 387                 goto fail;
 388         }
 389
 390     SAFE_FREE(dbuf.dptr);
 391
 392         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
 393                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
 394                         (double)fsp->dev, (double)fsp->inode ));
 395
 396     return True;
 397
 398  fail:
 399     SAFE_FREE(dbuf.dptr);
 400     return False;
 401 }
 402
 403 /****************************************************************************
 404  Calculate if locks have any overlap at all.
 405 ****************************************************************************/
 406
 407 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
 408 {
 409         if (start1 >= start2 && start1 <= start2 + size2)
 410                 return True;
 411
 412         if (start1 < start2 && start1 + size1 > start2)
 413                 return True;
 414
 415         return False;
 416 }
 417
 418 /****************************************************************************
 419  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 420  deleted and the number of records that are overlapped by this one, or -1 on error.
 421 ****************************************************************************/
 422
 423 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 424 {
 425         TDB_DATA kbuf = locking_key_fsp(fsp);
 426         TDB_DATA dbuf;
 427         struct posix_lock *locks;
 428         size_t i, count;
 429         BOOL found = False;
 430         int num_overlapping_records = 0;
 431
 432         dbuf.dptr = NULL;
 433
 434         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 435
 436         if (!dbuf.dptr) {
 437                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 438                 goto fail;
 439         }
 440
 441         /* There are existing locks - find a match. */
 442         locks = (struct posix_lock *)dbuf.dptr;
 443         count = (size_t)(dbuf.dsize / sizeof(*locks));
 444
 445         /*
 446          * Search for and delete the first record that matches the
 447          * unlock criteria.
 448          */
 449
 450         for (i=0; i<count; i++) {
 451                 struct posix_lock *entry = &locks[i];
 452
 453                 if (entry->fd == fsp->fd &&
 454                         entry->start == start &&
 455                         entry->size == size) {
 456
 457                         /* Make a copy if requested. */
 458                         if (pl)
 459                                 *pl = *entry;
 460
 461                         /* Found it - delete it. */
 462                         if (count == 1) {
 463                                 tdb_delete(posix_lock_tdb, kbuf);
 464                         } else {
 465                                 if (i < count-1) {
 466                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
 467                                 }
 468                                 dbuf.dsize -= sizeof(*locks);
 469                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 470                         }
 471                         count--;
 472                         found = True;
 473                         break;
 474                 }
 475         }
 476
 477         if (!found)
 478                 goto fail;
 479
 480         /*
 481          * Count the number of entries that are
 482          * overlapped by this unlock request.
 483          */
 484
 485         for (i = 0; i < count; i++) {
 486                 struct posix_lock *entry = &locks[i];
 487
 488                 if (fsp->fd == entry->fd &&
 489                         does_lock_overlap( start, size, entry->start, entry->size))
 490                                 num_overlapping_records++;
 491         }
 492
 493         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 494                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
 495                                 (unsigned int)num_overlapping_records ));
 496
 497     SAFE_FREE(dbuf.dptr);
 498
 499         return num_overlapping_records;
 500
 501  fail:
 502     SAFE_FREE(dbuf.dptr);
 503     return -1;
 504 }
 505
 506 /****************************************************************************
 507  Utility function to map a lock type correctly depending on the open
 508  mode of a file.
 509 ****************************************************************************/
 510
 511 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 512 {
 513         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 514                 /*
 515                  * Many UNIX's cannot get a write lock on a file opened read-only.
 516                  * Win32 locking semantics allow this.
 517                  * Do the best we can and attempt a read-only lock.
 518                  */
 519                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 520                 return F_RDLCK;
 521         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
 522                 /*
 523                  * Ditto for read locks on write only files.
 524                  */
 525                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 526                 return F_WRLCK;
 527         }
 528
 529   /*
 530    * This return should be the most normal, as we attempt
 531    * to always open files read/write.
 532    */
 533
 534   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 535 }
 536
 537 /****************************************************************************
 538  Check to see if the given unsigned lock range is within the possible POSIX
 539  range. Modifies the given args to be in range if possible, just returns
 540  False if not.
 541 ****************************************************************************/
 542
 543 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 544                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 545 {
 546         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
 547         SMB_OFF_T count = (SMB_OFF_T)u_count;
 548
 549         /*
 550          * For the type of system we are, attempt to
 551          * find the maximum positive lock offset as an SMB_OFF_T.
 552          */
 553
 554 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 555
 556         /*
 557          * In this case SMB_OFF_T is 64 bits,
 558          * and the underlying system can handle 64 bit signed locks.
 559          */
 560
 561     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 562     SMB_OFF_T mask = (mask2<<1);
 563     SMB_OFF_T max_positive_lock_offset = ~mask;
 564
 565 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 566
 567         /*
 568          * In this case either SMB_OFF_T is 32 bits,
 569          * or the underlying system cannot handle 64 bit signed locks.
 570          * All offsets & counts must be 2^31 or less.
 571          */
 572
 573     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
 574
 575 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 576
 577         /*
 578          * POSIX locks of length zero mean lock to end-of-file.
 579          * Win32 locks of length zero are point probes. Ignore
 580          * any Win32 locks of length zero. JRA.
 581          */
 582
 583         if (count == (SMB_OFF_T)0) {
 584                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 585                 return False;
 586         }
 587
 588         /*
 589          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 590          * ignore this lock.
 591          */
 592
 593         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
 594                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 595                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
 596                 return False;
 597         }
 598
 599         /*
 600          * We must truncate the offset and count to less than max_positive_lock_offset.
 601          */
 602
 603         offset &= max_positive_lock_offset;
 604         count &= max_positive_lock_offset;
 605
 606
 607         /*
 608          * Deal with a very common case of count of all ones.
 609          * (lock entire file).
 610          */
 611
 612         if(count == (SMB_OFF_T)-1)
 613                 count = max_positive_lock_offset;
 614
 615         /*
 616          * Truncate count to end at max lock offset.
 617          */
 618
 619         if (offset + count < 0 || offset + count > max_positive_lock_offset)
 620                 count = max_positive_lock_offset - offset;
 621
 622         /*
 623          * If we ate all the count, ignore this lock.
 624          */
 625
 626         if (count == 0) {
 627                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 628                                 (double)u_offset, (double)u_count ));
 629                 return False;
 630         }
 631
 632         /*
 633          * The mapping was successful.
 634          */
 635
 636         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 637                         (double)offset, (double)count ));
 638
 639         *offset_out = offset;
 640         *count_out = count;
 641
 642         return True;
 643 }
 644
 645 /****************************************************************************
 646  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 647  broken NFS implementations.
 648 ****************************************************************************/
 649
 650 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 651 {
 652         int ret;
 653         struct connection_struct *conn = fsp->conn;
 654
 655         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
 656
 657         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 658
 659         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 660
 661                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 662                                         (double)offset,(double)count));
 663                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 664                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
 665
 666                 /*
 667                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 668                  * 32 bit NFS mounted filesystems. Just ignore it.
 669                  */
 670
 671                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
 672                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 673                         return True;
 674                 }
 675
 676                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
 677                         /* 32 bit NFS file system, retry with smaller offset */
 678                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 679                         errno = 0;
 680                         count &= 0x7fffffff;
 681                         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 682                 }
 683         }
 684
 685         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 686
 687         return ret;
 688 }
 689
 690 /****************************************************************************
 691  POSIX function to see if a file region is locked. Returns True if the
 692  region is locked, False otherwise.
 693 ****************************************************************************/
 694
 695 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 696 {
 697         SMB_OFF_T offset;
 698         SMB_OFF_T count;
 699         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 700
 701         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 702                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 703
 704         /*
 705          * If the requested lock won't fit in the POSIX range, we will
 706          * never set it, so presume it is not locked.
 707          */
 708
 709         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 710                 return False;
 711
 712         /*
 713          * Note that most UNIX's can *test* for a write lock on
 714          * a read-only fd, just not *set* a write lock on a read-only
 715          * fd. So we don't need to use map_lock_type here.
 716          */
 717
 718         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 719 }
 720
 721 /*
 722  * Structure used when splitting a lock range
 723  * into a POSIX lock range. Doubly linked list.
 724  */
 725
 726 struct lock_list {
 727     struct lock_list *next;
 728     struct lock_list *prev;
 729     SMB_OFF_T start;
 730     SMB_OFF_T size;
 731 };
 732
 733 /****************************************************************************
 734  Create a list of lock ranges that don't overlap a given range. Used in calculating
 735  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 736  understand it :-).
 737 ****************************************************************************/
 738
 739 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
 740 {
 741         TDB_DATA kbuf = locking_key_fsp(fsp);
 742         TDB_DATA dbuf;
 743         struct posix_lock *locks;
 744         size_t num_locks, i;
 745
 746         dbuf.dptr = NULL;
 747
 748         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 749
 750         if (!dbuf.dptr)
 751                 return lhead;
 752
 753         locks = (struct posix_lock *)dbuf.dptr;
 754         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
 755
 756         /*
 757          * Check the current lock list on this dev/inode pair.
 758          * Quit if the list is deleted.
 759          */
 760
 761         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 762                 (double)lhead->start, (double)lhead->size ));
 763
 764         for (i=0; i<num_locks && lhead; i++) {
 765
 766                 struct posix_lock *lock = &locks[i];
 767                 struct lock_list *l_curr;
 768
 769                 /*
 770                  * Walk the lock list, checking for overlaps. Note that
 771                  * the lock list can expand within this loop if the current
 772                  * range being examined needs to be split.
 773                  */
 774
 775                 for (l_curr = lhead; l_curr;) {
 776
 777                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
 778                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 779
 780                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 781                                  (lock->start >= (l_curr->start + l_curr->size))) {
 782
 783                                 /* No overlap with this lock - leave this range alone. */
 784 /*********************************************
 785                                              +---------+
 786                                              | l_curr  |
 787                                              +---------+
 788                                 +-------+
 789                                 | lock  |
 790                                 +-------+
 791 OR....
 792              +---------+
 793              |  l_curr |
 794              +---------+
 795 **********************************************/
 796
 797                                 DEBUG(10,("no overlap case.\n" ));
 798
 799                                 l_curr = l_curr->next;
 800
 801                         } else if ( (l_curr->start >= lock->start) &&
 802                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 803
 804                                 /*
 805                                  * This unlock is completely overlapped by this existing lock range
 806                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 807                                  */
 808 /*********************************************
 809                 +---------+
 810                 |  l_curr |
 811                 +---------+
 812         +---------------------------+
 813         |       lock                |
 814         +---------------------------+
 815 **********************************************/
 816                                 /* Save the next pointer */
 817                                 struct lock_list *ul_next = l_curr->next;
 818
 819                                 DEBUG(10,("delete case.\n" ));
 820
 821                                 DLIST_REMOVE(lhead, l_curr);
 822                                 if(lhead == NULL)
 823                                         break; /* No more list... */
 824
 825                                 l_curr = ul_next;
 826
 827                         } else if ( (l_curr->start >= lock->start) &&
 828                                                 (l_curr->start < lock->start + lock->size) &&
 829                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 830
 831                                 /*
 832                                  * This unlock overlaps the existing lock range at the high end.
 833                                  * Truncate by moving start to existing range end and reducing size.
 834                                  */
 835 /*********************************************
 836                 +---------------+
 837                 |  l_curr       |
 838                 +---------------+
 839         +---------------+
 840         |    lock       |
 841         +---------------+
 842 BECOMES....
 843                         +-------+
 844                         | l_curr|
 845                         +-------+
 846 **********************************************/
 847
 848                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 849                                 l_curr->start = lock->start + lock->size;
 850
 851                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
 852                                                                 (double)l_curr->start, (double)l_curr->size ));
 853
 854                                 l_curr = l_curr->next;
 855
 856                         } else if ( (l_curr->start < lock->start) &&
 857                                                 (l_curr->start + l_curr->size > lock->start) &&
 858                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 859
 860                                 /*
 861                                  * This unlock overlaps the existing lock range at the low end.
 862                                  * Truncate by reducing size.
 863                                  */
 864 /*********************************************
 865    +---------------+
 866    |  l_curr       |
 867    +---------------+
 868            +---------------+
 869            |    lock       |
 870            +---------------+
 871 BECOMES....
 872    +-------+
 873    | l_curr|
 874    +-------+
 875 **********************************************/
 876
 877                                 l_curr->size = lock->start - l_curr->start;
 878
 879                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
 880                                                                 (double)l_curr->start, (double)l_curr->size ));
 881
 882                                 l_curr = l_curr->next;
 883
 884                         } else if ( (l_curr->start < lock->start) &&
 885                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 886                                 /*
 887                                  * Worst case scenario. Unlock request completely overlaps an existing
 888                                  * lock range. Split the request into two, push the new (upper) request
 889                                  * into the dlink list, and continue with the entry after ul_new (as we
 890                                  * know that ul_new will not overlap with this lock).
 891                                  */
 892 /*********************************************
 893         +---------------------------+
 894         |        l_curr             |
 895         +---------------------------+
 896                 +---------+
 897                 | lock    |
 898                 +---------+
 899 BECOMES.....
 900         +-------+         +---------+
 901         | l_curr|         | l_new   |
 902         +-------+         +---------+
 903 **********************************************/
 904                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
 905                                                                                                         sizeof(struct lock_list));
 906
 907                                 if(l_new == NULL) {
 908                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 909                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 910                                 }
 911
 912                                 ZERO_STRUCTP(l_new);
 913                                 l_new->start = lock->start + lock->size;
 914                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 915
 916                                 /* Truncate the l_curr. */
 917                                 l_curr->size = lock->start - l_curr->start;
 918
 919                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
 920 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 921                                                                 (double)l_new->start, (double)l_new->size ));
 922
 923                                 /*
 924                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 925                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
 926                                  */
 927
 928                                 l_new->prev = l_curr;
 929                                 l_new->next = l_curr->next;
 930                                 l_curr->next = l_new;
 931
 932                                 /* And move after the link we added. */
 933                                 l_curr = l_new->next;
 934
 935                         } else {
 936
 937                                 /*
 938                                  * This logic case should never happen. Ensure this is the
 939                                  * case by forcing an abort.... Remove in production.
 940                                  */
 941                                 pstring msg;
 942
 943                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 944 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
 945
 946                                 smb_panic(msg);
 947                         }
 948                 } /* end for ( l_curr = lhead; l_curr;) */
 949         } /* end for (i=0; i<num_locks && ul_head; i++) */
 950
 951         SAFE_FREE(dbuf.dptr);
 952
 953         return lhead;
 954 }
 955
 956 /****************************************************************************
 957  POSIX function to acquire a lock. Returns True if the
 958  lock could be granted, False if not.
 959 ****************************************************************************/
 960
 961 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 962 {
 963         SMB_OFF_T offset;
 964         SMB_OFF_T count;
 965         BOOL ret = True;
 966         size_t entry_num = 0;
 967         size_t lock_count;
 968         TALLOC_CTX *l_ctx = NULL;
 969         struct lock_list *llist = NULL;
 970         struct lock_list *ll = NULL;
 971         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 972
 973         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 974                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 975
 976         /*
 977          * If the requested lock won't fit in the POSIX range, we will
 978          * pretend it was successful.
 979          */
 980
 981         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 982                 return True;
 983
 984         /*
 985          * Windows is very strange. It allows read locks to be overlayed
 986          * (even over a write lock), but leaves the write lock in force until the first
 987          * unlock. It also reference counts the locks. This means the following sequence :
 988          *
 989          * process1                                      process2
 990          * ------------------------------------------------------------------------
 991          * WRITE LOCK : start = 2, len = 10
 992          *                                            READ LOCK: start =0, len = 10 - FAIL
 993          * READ LOCK : start = 0, len = 14
 994          *                                            READ LOCK: start =0, len = 10 - FAIL
 995          * UNLOCK : start = 2, len = 10
 996          *                                            READ LOCK: start =0, len = 10 - OK
 997          *
 998          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 999          * would leave a single read lock over the 0-14 region. In order to
1000          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1001          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1002          * semantics that if a write lock is added, then it will be first in the array.
1003          */
1004
1005         if ((l_ctx = talloc_init()) == NULL) {
1006                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1007                 return True; /* Not a fatal error. */
1008         }
1009
1010         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1011                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1012                 talloc_destroy(l_ctx);
1013                 return True; /* Not a fatal error. */
1014         }
1015
1016         /*
1017          * Create the initial list entry containing the
1018          * lock we want to add.
1019          */
1020
1021         ZERO_STRUCTP(ll);
1022         ll->start = offset;
1023         ll->size = count;
1024
1025         DLIST_ADD(llist, ll);
1026
1027         /*
1028          * The following call calculates if there are any
1029          * overlapping locks held by this process on
1030          * fd's open on the same file and splits this list
1031          * into a list of lock ranges that do not overlap with existing
1032          * POSIX locks.
1033          */
1034
1035         llist = posix_lock_list(l_ctx, llist, fsp);
1036
1037         /*
1038          * Now we have the list of ranges to lock it is safe to add the
1039          * entry into the POSIX lock tdb. We take note of the entry we
1040          * added here in case we have to remove it on POSIX lock fail.
1041          */
1042
1043         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1044                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1045                 talloc_destroy(l_ctx);
1046                 return False;
1047         }
1048
1049         /*
1050          * Add the POSIX locks on the list of ranges returned.
1051          * As the lock is supposed to be added atomically, we need to
1052          * back out all the locks if any one of these calls fail.
1053          */
1054
1055         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1056                 offset = ll->start;
1057                 count = ll->size;
1058
1059                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1060                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1061
1062                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1063                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1064                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1065                         ret = False;
1066                         break;
1067                 }
1068         }
1069
1070         if (!ret) {
1071
1072                 /*
1073                  * Back out all the POSIX locks we have on fail.
1074                  */
1075
1076                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1077                         offset = ll->start;
1078                         count = ll->size;
1079
1080                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1081                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1082
1083                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1084                 }
1085
1086                 /*
1087                  * Remove the tdb entry for this lock.
1088                  */
1089
1090                 delete_posix_lock_entry_by_index(fsp,entry_num);
1091         }
1092
1093         talloc_destroy(l_ctx);
1094         return ret;
1095 }
1096
1097 /****************************************************************************
1098  POSIX function to release a lock. Returns True if the
1099  lock could be released, False if not.
1100 ****************************************************************************/
1101
1102 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1103 {
1104         SMB_OFF_T offset;
1105         SMB_OFF_T count;
1106         BOOL ret = True;
1107         TALLOC_CTX *ul_ctx = NULL;
1108         struct lock_list *ulist = NULL;
1109         struct lock_list *ul = NULL;
1110         struct posix_lock deleted_lock;
1111         int num_overlapped_entries;
1112
1113         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1114                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1115
1116         /*
1117          * If the requested lock won't fit in the POSIX range, we will
1118          * pretend it was successful.
1119          */
1120
1121         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1122                 return True;
1123
1124         /*
1125          * We treat this as one unlock request for POSIX accounting purposes even
1126          * if it may later be split into multiple smaller POSIX unlock ranges.
1127          * num_overlapped_entries is the number of existing locks that have any
1128          * overlap with this unlock request.
1129          */
1130
1131         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1132
1133         if (num_overlapped_entries == -1) {
1134         smb_panic("release_posix_lock: unable find entry to delete !\n");
1135         }
1136
1137         /*
1138          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1139          * a POSIX write lock, then before doing the unlock we need to downgrade
1140          * the POSIX lock to a read lock. This allows any overlapping read locks
1141          * to be atomically maintained.
1142          */
1143
1144         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1145                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1146                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1147                         return False;
1148                 }
1149         }
1150
1151         if ((ul_ctx = talloc_init()) == NULL) {
1152                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1153                 return True; /* Not a fatal error. */
1154         }
1155
1156         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1157                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1158                 talloc_destroy(ul_ctx);
1159                 return True; /* Not a fatal error. */
1160         }
1161
1162         /*
1163          * Create the initial list entry containing the
1164          * lock we want to remove.
1165          */
1166
1167         ZERO_STRUCTP(ul);
1168         ul->start = offset;
1169         ul->size = count;
1170
1171         DLIST_ADD(ulist, ul);
1172
1173         /*
1174          * The following call calculates if there are any
1175          * overlapping locks held by this process on
1176          * fd's open on the same file and creates a
1177          * list of unlock ranges that will allow
1178          * POSIX lock ranges to remain on the file whilst the
1179          * unlocks are performed.
1180          */
1181
1182         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1183
1184         /*
1185          * Release the POSIX locks on the list of ranges returned.
1186          */
1187
1188         for(; ulist; ulist = ulist->next) {
1189                 offset = ulist->start;
1190                 count = ulist->size;
1191
1192                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1193                         (double)offset, (double)count ));
1194
1195                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1196                         ret = False;
1197         }
1198
1199         talloc_destroy(ul_ctx);
1200
1201         return ret;
1202 }
1203
1204 /****************************************************************************
1205  Remove all lock entries for a specific dev/inode pair from the tdb.
1206 ****************************************************************************/
1207
1208 static void delete_posix_lock_entries(files_struct *fsp)
1209 {
1210         TDB_DATA kbuf = locking_key_fsp(fsp);
1211
1212         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1213                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1214 }
1215
1216 /****************************************************************************
1217  Debug function.
1218 ****************************************************************************/
1219
1220 static void dump_entry(struct posix_lock *pl)
1221 {
1222         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1223                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1224 }
1225
1226 /****************************************************************************
1227  Remove any locks on this fd. Called from file_close().
1228 ****************************************************************************/
1229
1230 void posix_locking_close_file(files_struct *fsp)
1231 {
1232         struct posix_lock *entries = NULL;
1233         size_t count, i;
1234
1235         /*
1236          * Optimization for the common case where we are the only
1237          * opener of a file. If all fd entries are our own, we don't
1238          * need to explicitly release all the locks via the POSIX functions,
1239          * we can just remove all the entries in the tdb and allow the
1240          * close to remove the real locks.
1241          */
1242
1243         count = get_posix_lock_entries(fsp, &entries);
1244
1245         if (count == 0) {
1246                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1247                 return;
1248         }
1249
1250         for (i = 0; i < count; i++) {
1251                 if (entries[i].fd != fsp->fd )
1252                         break;
1253
1254                 dump_entry(&entries[i]);
1255         }
1256
1257         if (i == count) {
1258                 /* All locks are ours. */
1259                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1260                         fsp->fsp_name, (unsigned int)count ));
1261                 SAFE_FREE(entries);
1262                 delete_posix_lock_entries(fsp);
1263                 return;
1264         }
1265
1266         /*
1267          * Difficult case. We need to delete all our locks, whilst leaving
1268          * all other POSIX locks in place.
1269          */
1270
1271         for (i = 0; i < count; i++) {
1272                 struct posix_lock *pl = &entries[i];
1273                 if (pl->fd == fsp->fd)
1274                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1275         }
1276         SAFE_FREE(entries);
1277 }
1278
1279 /*******************************************************************
1280  Create the in-memory POSIX lock databases.
1281 ********************************************************************/
1282
1283 BOOL posix_locking_init(int read_only)
1284 {
1285         if (posix_lock_tdb && posix_pending_close_tdb)
1286                 return True;
1287
1288         if (!posix_lock_tdb)
1289                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1290                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1291         if (!posix_lock_tdb) {
1292                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1293                 return False;
1294         }
1295         if (!posix_pending_close_tdb)
1296                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1297                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1298         if (!posix_pending_close_tdb) {
1299                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1300                 return False;
1301         }
1302
1303         return True;
1304 }
1305
1306 /*******************************************************************
1307  Delete the in-memory POSIX lock databases.
1308 ********************************************************************/
1309
1310 BOOL posix_locking_end(void)
1311 {
1312     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1313                 return False;
1314     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1315                 return False;
1316         return True;
1317 }