source3/locking/posix.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 3.0
   4    Locking functions
   5    Copyright (C) Jeremy Allison 1992-2000
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21    Revision History:
  22
  23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  24 */
  25
  26 #include "includes.h"
  27 extern int DEBUGLEVEL;
  28
  29 /*
  30  * The POSIX locking database handle.
  31  */
  32
  33 static TDB_CONTEXT *posix_lock_tdb;
  34
  35 /*
  36  * The pending close database handle.
  37  */
  38
  39 static TDB_CONTEXT *posix_pending_close_tdb;
  40
  41 /*
  42  * The data in POSIX lock records is an unsorted linear array of these
  43  * records.  It is unnecessary to store the count as tdb provides the
  44  * size of the record.
  45  */
  46
  47 struct posix_lock {
  48         int fd;
  49         SMB_OFF_T start;
  50         SMB_OFF_T size;
  51         int lock_type;
  52 };
  53
  54 /*
  55  * The data in POSIX pending close records is an unsorted linear array of int
  56  * records.  It is unnecessary to store the count as tdb provides the
  57  * size of the record.
  58  */
  59
  60 /* The key used in both the POSIX databases. */
  61
  62 struct posix_lock_key {
  63         SMB_DEV_T device;
  64         SMB_INO_T inode;
  65 };
  66
  67 /*******************************************************************
  68  Form a static locking key for a dev/inode pair.
  69 ******************************************************************/
  70
  71 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  72 {
  73         static struct posix_lock_key key;
  74         TDB_DATA kbuf;
  75
  76         memset(&key, '\0', sizeof(key));
  77         key.device = dev;
  78         key.inode = inode;
  79         kbuf.dptr = (char *)&key;
  80         kbuf.dsize = sizeof(key);
  81         return kbuf;
  82 }
  83
  84 /*******************************************************************
  85  Convenience function to get a key from an fsp.
  86 ******************************************************************/
  87
  88 static TDB_DATA locking_key_fsp(files_struct *fsp)
  89 {
  90         return locking_key(fsp->dev, fsp->inode);
  91 }
  92
  93 /****************************************************************************
  94  Add an fd to the pending close tdb.
  95 ****************************************************************************/
  96
  97 static BOOL add_fd_to_close_entry(files_struct *fsp)
  98 {
  99         TDB_DATA kbuf = locking_key_fsp(fsp);
 100         TDB_DATA dbuf;
 101         char *tp;
 102
 103         dbuf.dptr = NULL;
 104
 105         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 106
 107         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
 108         if (!tp) {
 109                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 110                 if (dbuf.dptr)
 111                         free(dbuf.dptr);
 112                 return False;
 113         } else
 114                 dbuf.dptr = tp;
 115
 116         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
 117         dbuf.dsize += sizeof(int);
 118
 119         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 120                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 121         }
 122
 123         free(dbuf.dptr);
 124         return True;
 125 }
 126
 127 /****************************************************************************
 128  Remove all fd entries for a specific dev/inode pair from the tdb.
 129 ****************************************************************************/
 130
 131 static void delete_close_entries(files_struct *fsp)
 132 {
 133         TDB_DATA kbuf = locking_key_fsp(fsp);
 134
 135         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 136                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 137 }
 138
 139 /****************************************************************************
 140  Get the array of POSIX pending close records for an open fsp. Caller must
 141  free. Returns number of entries.
 142 ****************************************************************************/
 143
 144 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 145 {
 146         TDB_DATA kbuf = locking_key_fsp(fsp);
 147         TDB_DATA dbuf;
 148         size_t count = 0;
 149
 150         *entries = NULL;
 151         dbuf.dptr = NULL;
 152
 153         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 154
 155     if (!dbuf.dptr) {
 156                 return 0;
 157         }
 158
 159         *entries = (int *)dbuf.dptr;
 160         count = (size_t)(dbuf.dsize / sizeof(int));
 161
 162         return count;
 163 }
 164
 165 /****************************************************************************
 166  Get the array of POSIX locks for an fsp. Caller must free. Returns
 167  number of entries.
 168 ****************************************************************************/
 169
 170 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 171 {
 172         TDB_DATA kbuf = locking_key_fsp(fsp);
 173         TDB_DATA dbuf;
 174         size_t count = 0;
 175
 176         *entries = NULL;
 177
 178         dbuf.dptr = NULL;
 179
 180         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 181
 182     if (!dbuf.dptr) {
 183                 return 0;
 184         }
 185
 186         *entries = (struct posix_lock *)dbuf.dptr;
 187         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 188
 189         return count;
 190 }
 191
 192 /****************************************************************************
 193  Deal with pending closes needed by POSIX locking support.
 194  Note that posix_locking_close_file() is expected to have been called
 195  to delete all locks on this fsp before this function is called.
 196 ****************************************************************************/
 197
 198 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 199 {
 200         int saved_errno = 0;
 201         int ret;
 202         size_t count, i;
 203         struct posix_lock *entries = NULL;
 204         int *fd_array = NULL;
 205         BOOL locks_on_other_fds = False;
 206
 207         if (!lp_posix_locking(SNUM(conn))) {
 208                 /*
 209                  * No POSIX to worry about, just close.
 210                  */
 211                 ret = conn->vfs_ops.close(fsp,fsp->fd);
 212                 fsp->fd = -1;
 213                 return ret;
 214         }
 215
 216         /*
 217          * Get the number of outstanding POSIX locks on this dev/inode pair.
 218          */
 219
 220         count = get_posix_lock_entries(fsp, &entries);
 221
 222         /*
 223          * Check if there are any outstanding locks belonging to
 224          * other fd's. This should never be the case if posix_locking_close_file()
 225          * has been called first, but it never hurts to be *sure*.
 226          */
 227
 228         for (i = 0; i < count; i++) {
 229                 if (entries[i].fd != fsp->fd) {
 230                         locks_on_other_fds = True;
 231                         break;
 232                 }
 233         }
 234
 235         if (locks_on_other_fds) {
 236
 237                 /*
 238                  * There are outstanding locks on this dev/inode pair on other fds.
 239                  * Add our fd to the pending close tdb and set fsp->fd to -1.
 240                  */
 241
 242                 if (!add_fd_to_close_entry(fsp)) {
 243                         free((char *)entries);
 244                         return False;
 245                 }
 246
 247                 free((char *)entries);
 248                 fsp->fd = -1;
 249                 return 0;
 250         }
 251
 252         if(entries)
 253                 free((char *)entries);
 254
 255         /*
 256          * No outstanding POSIX locks. Get the pending close fd's
 257          * from the tdb and close them all.
 258          */
 259
 260         count = get_posix_pending_close_entries(fsp, &fd_array);
 261
 262         if (count) {
 263                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 264
 265                 for(i = 0; i < count; i++) {
 266                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
 267                                 saved_errno = errno;
 268                         }
 269                 }
 270
 271                 /*
 272                  * Delete all fd's stored in the tdb
 273                  * for this dev/inode pair.
 274                  */
 275
 276                 delete_close_entries(fsp);
 277         }
 278
 279         if (fd_array)
 280                 free((char *)fd_array);
 281
 282         /*
 283          * Finally close the fd associated with this fsp.
 284          */
 285
 286         ret = conn->vfs_ops.close(fsp,fsp->fd);
 287
 288         if (saved_errno != 0) {
 289         errno = saved_errno;
 290                 ret = -1;
 291     }
 292
 293         fsp->fd = -1;
 294
 295         return ret;
 296 }
 297
 298 /****************************************************************************
 299  Debugging aid :-).
 300 ****************************************************************************/
 301
 302 static const char *posix_lock_type_name(int lock_type)
 303 {
 304         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 305 }
 306
 307 /****************************************************************************
 308  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
 309  then the POSIX fcntl lock fails.
 310 ****************************************************************************/
 311
 312 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
 313 {
 314         TDB_DATA kbuf = locking_key_fsp(fsp);
 315         TDB_DATA dbuf;
 316         struct posix_lock *locks;
 317         size_t count;
 318
 319         dbuf.dptr = NULL;
 320
 321         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 322
 323         if (!dbuf.dptr) {
 324                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
 325                 goto fail;
 326         }
 327
 328         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 329         locks = (struct posix_lock *)dbuf.dptr;
 330
 331         if (count == 1) {
 332                 tdb_delete(posix_lock_tdb, kbuf);
 333         } else {
 334                 if (entry < count-1) {
 335                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
 336                 }
 337                 dbuf.dsize -= sizeof(*locks);
 338                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 339         }
 340
 341         free(dbuf.dptr);
 342
 343         return True;
 344
 345  fail:
 346     if (dbuf.dptr)
 347                 free(dbuf.dptr);
 348     return False;
 349 }
 350
 351 /****************************************************************************
 352  Add an entry into the POSIX locking tdb. We return the index number of the
 353  added lock (used in case we need to delete *exactly* this entry). Returns
 354  False on fail, True on success.
 355 ****************************************************************************/
 356
 357 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
 358 {
 359         TDB_DATA kbuf = locking_key_fsp(fsp);
 360         TDB_DATA dbuf;
 361         struct posix_lock pl;
 362         char *tp;
 363
 364         dbuf.dptr = NULL;
 365
 366         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 367
 368         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
 369
 370         /*
 371          * Add new record.
 372          */
 373
 374         pl.fd = fsp->fd;
 375         pl.start = start;
 376         pl.size = size;
 377         pl.lock_type = lock_type;
 378
 379         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
 380         if (!tp) {
 381                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 382                 goto fail;
 383         } else
 384                 dbuf.dptr = tp;
 385
 386         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
 387         dbuf.dsize += sizeof(pl);
 388
 389         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 390                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 391                 goto fail;
 392         }
 393
 394     free(dbuf.dptr);
 395
 396         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
 397                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
 398                         (double)fsp->dev, (double)fsp->inode ));
 399
 400     return True;
 401
 402  fail:
 403     if (dbuf.dptr)
 404                 free(dbuf.dptr);
 405     return False;
 406 }
 407
 408 /****************************************************************************
 409  Calculate if locks have any overlap at all.
 410 ****************************************************************************/
 411
 412 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
 413 {
 414         if (start1 >= start2 && start1 <= start2 + size2)
 415                 return True;
 416
 417         if (start1 < start2 && start1 + size1 > start2)
 418                 return True;
 419
 420         return False;
 421 }
 422
 423 /****************************************************************************
 424  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 425  deleted and the number of records that are overlapped by this one, or -1 on error.
 426 ****************************************************************************/
 427
 428 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 429 {
 430         TDB_DATA kbuf = locking_key_fsp(fsp);
 431         TDB_DATA dbuf;
 432         struct posix_lock *locks;
 433         size_t i, count;
 434         BOOL found = False;
 435         int num_overlapping_records = 0;
 436
 437         dbuf.dptr = NULL;
 438
 439         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 440
 441         if (!dbuf.dptr) {
 442                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 443                 goto fail;
 444         }
 445
 446         /* There are existing locks - find a match. */
 447         locks = (struct posix_lock *)dbuf.dptr;
 448         count = (size_t)(dbuf.dsize / sizeof(*locks));
 449
 450         /*
 451          * Search for and delete the first record that matches the
 452          * unlock criteria.
 453          */
 454
 455         for (i=0; i<count; i++) {
 456                 struct posix_lock *entry = &locks[i];
 457
 458                 if (entry->fd == fsp->fd &&
 459                         entry->start == start &&
 460                         entry->size == size) {
 461
 462                         /* Make a copy if requested. */
 463                         if (pl)
 464                                 *pl = *entry;
 465
 466                         /* Found it - delete it. */
 467                         if (count == 1) {
 468                                 tdb_delete(posix_lock_tdb, kbuf);
 469                         } else {
 470                                 if (i < count-1) {
 471                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
 472                                 }
 473                                 dbuf.dsize -= sizeof(*locks);
 474                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 475                         }
 476                         count--;
 477                         found = True;
 478                         break;
 479                 }
 480         }
 481
 482         if (!found)
 483                 goto fail;
 484
 485         /*
 486          * Count the number of entries that are
 487          * overlapped by this unlock request.
 488          */
 489
 490         for (i = 0; i < count; i++) {
 491                 struct posix_lock *entry = &locks[i];
 492
 493                 if (fsp->fd == entry->fd &&
 494                         does_lock_overlap( start, size, entry->start, entry->size))
 495                                 num_overlapping_records++;
 496         }
 497
 498         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 499                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
 500                                 (unsigned int)num_overlapping_records ));
 501
 502     if (dbuf.dptr)
 503                 free(dbuf.dptr);
 504
 505         return num_overlapping_records;
 506
 507  fail:
 508     if (dbuf.dptr)
 509                 free(dbuf.dptr);
 510     return -1;
 511 }
 512
 513 /****************************************************************************
 514  Utility function to map a lock type correctly depending on the open
 515  mode of a file.
 516 ****************************************************************************/
 517
 518 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 519 {
 520         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 521                 /*
 522                  * Many UNIX's cannot get a write lock on a file opened read-only.
 523                  * Win32 locking semantics allow this.
 524                  * Do the best we can and attempt a read-only lock.
 525                  */
 526                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 527                 return F_RDLCK;
 528         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
 529                 /*
 530                  * Ditto for read locks on write only files.
 531                  */
 532                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 533                 return F_WRLCK;
 534         }
 535
 536   /*
 537    * This return should be the most normal, as we attempt
 538    * to always open files read/write.
 539    */
 540
 541   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 542 }
 543
 544 /****************************************************************************
 545  Check to see if the given unsigned lock range is within the possible POSIX
 546  range. Modifies the given args to be in range if possible, just returns
 547  False if not.
 548 ****************************************************************************/
 549
 550 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 551                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 552 {
 553         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
 554         SMB_OFF_T count = (SMB_OFF_T)u_count;
 555
 556         /*
 557          * For the type of system we are, attempt to
 558          * find the maximum positive lock offset as an SMB_OFF_T.
 559          */
 560
 561 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 562
 563         /*
 564          * In this case SMB_OFF_T is 64 bits,
 565          * and the underlying system can handle 64 bit signed locks.
 566          */
 567
 568     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 569     SMB_OFF_T mask = (mask2<<1);
 570     SMB_OFF_T max_positive_lock_offset = ~mask;
 571
 572 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 573
 574         /*
 575          * In this case either SMB_OFF_T is 32 bits,
 576          * or the underlying system cannot handle 64 bit signed locks.
 577          * All offsets & counts must be 2^31 or less.
 578          */
 579
 580     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
 581
 582 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 583
 584         /*
 585          * POSIX locks of length zero mean lock to end-of-file.
 586          * Win32 locks of length zero are point probes. Ignore
 587          * any Win32 locks of length zero. JRA.
 588          */
 589
 590         if (count == (SMB_OFF_T)0) {
 591                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
 592                 return False;
 593         }
 594
 595         /*
 596          * If the given offset was > max_positive_lock_offset then we cannot map this at all
 597          * ignore this lock.
 598          */
 599
 600         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
 601                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
 602                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
 603                 return False;
 604         }
 605
 606         /*
 607          * We must truncate the offset and count to less than max_positive_lock_offset.
 608          */
 609
 610         offset &= max_positive_lock_offset;
 611         count &= max_positive_lock_offset;
 612
 613
 614         /*
 615          * Deal with a very common case of count of all ones.
 616          * (lock entire file).
 617          */
 618
 619         if(count == (SMB_OFF_T)-1)
 620                 count = max_positive_lock_offset;
 621
 622         /*
 623          * Truncate count to end at max lock offset.
 624          */
 625
 626         if (offset + count < 0 || offset + count > max_positive_lock_offset)
 627                 count = max_positive_lock_offset - offset;
 628
 629         /*
 630          * If we ate all the count, ignore this lock.
 631          */
 632
 633         if (count == 0) {
 634                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
 635                                 (double)u_offset, (double)u_count ));
 636                 return False;
 637         }
 638
 639         /*
 640          * The mapping was successful.
 641          */
 642
 643         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 644                         (double)offset, (double)count ));
 645
 646         *offset_out = offset;
 647         *count_out = count;
 648
 649         return True;
 650 }
 651
 652 /****************************************************************************
 653  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 654  broken NFS implementations.
 655 ****************************************************************************/
 656
 657 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 658 {
 659         int ret;
 660         struct connection_struct *conn = fsp->conn;
 661
 662         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
 663
 664         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 665
 666         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
 667
 668                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
 669                                         (double)offset,(double)count));
 670                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
 671                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
 672
 673                 /*
 674                  * If the offset is > 0x7FFFFFFF then this will cause problems on
 675                  * 32 bit NFS mounted filesystems. Just ignore it.
 676                  */
 677
 678                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
 679                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
 680                         return True;
 681                 }
 682
 683                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
 684                         /* 32 bit NFS file system, retry with smaller offset */
 685                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
 686                         errno = 0;
 687                         count &= 0x7fffffff;
 688                         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
 689                 }
 690         }
 691
 692         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
 693
 694         return ret;
 695 }
 696
 697 /****************************************************************************
 698  POSIX function to see if a file region is locked. Returns True if the
 699  region is locked, False otherwise.
 700 ****************************************************************************/
 701
 702 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 703 {
 704         SMB_OFF_T offset;
 705         SMB_OFF_T count;
 706         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 707
 708         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 709                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 710
 711         /*
 712          * If the requested lock won't fit in the POSIX range, we will
 713          * never set it, so presume it is not locked.
 714          */
 715
 716         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 717                 return False;
 718
 719         /*
 720          * Note that most UNIX's can *test* for a write lock on
 721          * a read-only fd, just not *set* a write lock on a read-only
 722          * fd. So we don't need to use map_lock_type here.
 723          */
 724
 725         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 726 }
 727
 728 /*
 729  * Structure used when splitting a lock range
 730  * into a POSIX lock range. Doubly linked list.
 731  */
 732
 733 struct lock_list {
 734     struct lock_list *next;
 735     struct lock_list *prev;
 736     SMB_OFF_T start;
 737     SMB_OFF_T size;
 738 };
 739
 740 /****************************************************************************
 741  Create a list of lock ranges that don't overlap a given range. Used in calculating
 742  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
 743  understand it :-).
 744 ****************************************************************************/
 745
 746 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
 747 {
 748         TDB_DATA kbuf = locking_key_fsp(fsp);
 749         TDB_DATA dbuf;
 750         struct posix_lock *locks;
 751         size_t num_locks, i;
 752
 753         dbuf.dptr = NULL;
 754
 755         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 756
 757         if (!dbuf.dptr)
 758                 return lhead;
 759
 760         locks = (struct posix_lock *)dbuf.dptr;
 761         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
 762
 763         /*
 764          * Check the current lock list on this dev/inode pair.
 765          * Quit if the list is deleted.
 766          */
 767
 768         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
 769                 (double)lhead->start, (double)lhead->size ));
 770
 771         for (i=0; i<num_locks && lhead; i++) {
 772
 773                 struct posix_lock *lock = &locks[i];
 774                 struct lock_list *l_curr;
 775
 776                 /*
 777                  * Walk the lock list, checking for overlaps. Note that
 778                  * the lock list can expand within this loop if the current
 779                  * range being examined needs to be split.
 780                  */
 781
 782                 for (l_curr = lhead; l_curr;) {
 783
 784                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
 785                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
 786
 787                         if ( (l_curr->start >= (lock->start + lock->size)) ||
 788                                  (lock->start >= (l_curr->start + l_curr->size))) {
 789
 790                                 /* No overlap with this lock - leave this range alone. */
 791 /*********************************************
 792                                              +---------+
 793                                              | l_curr  |
 794                                              +---------+
 795                                 +-------+
 796                                 | lock  |
 797                                 +-------+
 798 OR....
 799              +---------+
 800              |  l_curr |
 801              +---------+
 802 **********************************************/
 803
 804                                 DEBUG(10,("no overlap case.\n" ));
 805
 806                                 l_curr = l_curr->next;
 807
 808                         } else if ( (l_curr->start >= lock->start) &&
 809                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 810
 811                                 /*
 812                                  * This unlock is completely overlapped by this existing lock range
 813                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 814                                  */
 815 /*********************************************
 816                 +---------+
 817                 |  l_curr |
 818                 +---------+
 819         +---------------------------+
 820         |       lock                |
 821         +---------------------------+
 822 **********************************************/
 823                                 /* Save the next pointer */
 824                                 struct lock_list *ul_next = l_curr->next;
 825
 826                                 DEBUG(10,("delete case.\n" ));
 827
 828                                 DLIST_REMOVE(lhead, l_curr);
 829                                 if(lhead == NULL)
 830                                         break; /* No more list... */
 831
 832                                 l_curr = ul_next;
 833
 834                         } else if ( (l_curr->start >= lock->start) &&
 835                                                 (l_curr->start < lock->start + lock->size) &&
 836                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 837
 838                                 /*
 839                                  * This unlock overlaps the existing lock range at the high end.
 840                                  * Truncate by moving start to existing range end and reducing size.
 841                                  */
 842 /*********************************************
 843                 +---------------+
 844                 |  l_curr       |
 845                 +---------------+
 846         +---------------+
 847         |    lock       |
 848         +---------------+
 849 BECOMES....
 850                         +-------+
 851                         | l_curr|
 852                         +-------+
 853 **********************************************/
 854
 855                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
 856                                 l_curr->start = lock->start + lock->size;
 857
 858                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
 859                                                                 (double)l_curr->start, (double)l_curr->size ));
 860
 861                                 l_curr = l_curr->next;
 862
 863                         } else if ( (l_curr->start < lock->start) &&
 864                                                 (l_curr->start + l_curr->size > lock->start) &&
 865                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
 866
 867                                 /*
 868                                  * This unlock overlaps the existing lock range at the low end.
 869                                  * Truncate by reducing size.
 870                                  */
 871 /*********************************************
 872    +---------------+
 873    |  l_curr       |
 874    +---------------+
 875            +---------------+
 876            |    lock       |
 877            +---------------+
 878 BECOMES....
 879    +-------+
 880    | l_curr|
 881    +-------+
 882 **********************************************/
 883
 884                                 l_curr->size = lock->start - l_curr->start;
 885
 886                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
 887                                                                 (double)l_curr->start, (double)l_curr->size ));
 888
 889                                 l_curr = l_curr->next;
 890
 891                         } else if ( (l_curr->start < lock->start) &&
 892                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
 893                                 /*
 894                                  * Worst case scenario. Unlock request completely overlaps an existing
 895                                  * lock range. Split the request into two, push the new (upper) request
 896                                  * into the dlink list, and continue with the entry after ul_new (as we
 897                                  * know that ul_new will not overlap with this lock).
 898                                  */
 899 /*********************************************
 900         +---------------------------+
 901         |        l_curr             |
 902         +---------------------------+
 903                 +---------+
 904                 | lock    |
 905                 +---------+
 906 BECOMES.....
 907         +-------+         +---------+
 908         | l_curr|         | l_new   |
 909         +-------+         +---------+
 910 **********************************************/
 911                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
 912                                                                                                         sizeof(struct lock_list));
 913
 914                                 if(l_new == NULL) {
 915                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
 916                                         return NULL; /* The talloc_destroy takes care of cleanup. */
 917                                 }
 918
 919                                 ZERO_STRUCTP(l_new);
 920                                 l_new->start = lock->start + lock->size;
 921                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
 922
 923                                 /* Truncate the l_curr. */
 924                                 l_curr->size = lock->start - l_curr->start;
 925
 926                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
 927 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
 928                                                                 (double)l_new->start, (double)l_new->size ));
 929
 930                                 /*
 931                                  * Add into the dlink list after the l_curr point - NOT at lhead.
 932                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
 933                                  */
 934
 935                                 l_new->prev = l_curr;
 936                                 l_new->next = l_curr->next;
 937                                 l_curr->next = l_new;
 938
 939                                 /* And move after the link we added. */
 940                                 l_curr = l_new->next;
 941
 942                         } else {
 943
 944                                 /*
 945                                  * This logic case should never happen. Ensure this is the
 946                                  * case by forcing an abort.... Remove in production.
 947                                  */
 948                                 pstring msg;
 949
 950                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
 951 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
 952
 953                                 smb_panic(msg);
 954                         }
 955                 } /* end for ( l_curr = lhead; l_curr;) */
 956         } /* end for (i=0; i<num_locks && ul_head; i++) */
 957
 958         if (dbuf.dptr)
 959                 free(dbuf.dptr);
 960
 961         return lhead;
 962 }
 963
 964 /****************************************************************************
 965  POSIX function to acquire a lock. Returns True if the
 966  lock could be granted, False if not.
 967 ****************************************************************************/
 968
 969 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 970 {
 971         SMB_OFF_T offset;
 972         SMB_OFF_T count;
 973         BOOL ret = True;
 974         size_t entry_num = 0;
 975         size_t lock_count;
 976         TALLOC_CTX *l_ctx = NULL;
 977         struct lock_list *llist = NULL;
 978         struct lock_list *ll = NULL;
 979         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 980
 981         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 982                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 983
 984         /*
 985          * If the requested lock won't fit in the POSIX range, we will
 986          * pretend it was successful.
 987          */
 988
 989         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 990                 return True;
 991
 992         /*
 993          * Windows is very strange. It allows read locks to be overlayed
 994          * (even over a write lock), but leaves the write lock in force until the first
 995          * unlock. It also reference counts the locks. This means the following sequence :
 996          *
 997          * process1                                      process2
 998          * ------------------------------------------------------------------------
 999          * WRITE LOCK : start = 2, len = 10
1000          *                                            READ LOCK: start =0, len = 10 - FAIL
1001          * READ LOCK : start = 0, len = 14
1002          *                                            READ LOCK: start =0, len = 10 - FAIL
1003          * UNLOCK : start = 2, len = 10
1004          *                                            READ LOCK: start =0, len = 10 - OK
1005          *
1006          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1007          * would leave a single read lock over the 0-14 region. In order to
1008          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1009          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1010          * semantics that if a write lock is added, then it will be first in the array.
1011          */
1012
1013         if ((l_ctx = talloc_init()) == NULL) {
1014                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1015                 return True; /* Not a fatal error. */
1016         }
1017
1018         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1019                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1020                 talloc_destroy(l_ctx);
1021                 return True; /* Not a fatal error. */
1022         }
1023
1024         /*
1025          * Create the initial list entry containing the
1026          * lock we want to add.
1027          */
1028
1029         ZERO_STRUCTP(ll);
1030         ll->start = offset;
1031         ll->size = count;
1032
1033         DLIST_ADD(llist, ll);
1034
1035         /*
1036          * The following call calculates if there are any
1037          * overlapping locks held by this process on
1038          * fd's open on the same file and splits this list
1039          * into a list of lock ranges that do not overlap with existing
1040          * POSIX locks.
1041          */
1042
1043         llist = posix_lock_list(l_ctx, llist, fsp);
1044
1045         /*
1046          * Now we have the list of ranges to lock it is safe to add the
1047          * entry into the POSIX lock tdb. We take note of the entry we
1048          * added here in case we have to remove it on POSIX lock fail.
1049          */
1050
1051         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1052                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1053                 talloc_destroy(l_ctx);
1054                 return False;
1055         }
1056
1057         /*
1058          * Add the POSIX locks on the list of ranges returned.
1059          * As the lock is supposed to be added atomically, we need to
1060          * back out all the locks if any one of these calls fail.
1061          */
1062
1063         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1064                 offset = ll->start;
1065                 count = ll->size;
1066
1067                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1068                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1069
1070                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1071                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1072                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1073                         ret = False;
1074                         break;
1075                 }
1076         }
1077
1078         if (!ret) {
1079
1080                 /*
1081                  * Back out all the POSIX locks we have on fail.
1082                  */
1083
1084                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1085                         offset = ll->start;
1086                         count = ll->size;
1087
1088                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1089                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1090
1091                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1092                 }
1093
1094                 /*
1095                  * Remove the tdb entry for this lock.
1096                  */
1097
1098                 delete_posix_lock_entry_by_index(fsp,entry_num);
1099         }
1100
1101         talloc_destroy(l_ctx);
1102         return ret;
1103 }
1104
1105 /****************************************************************************
1106  POSIX function to release a lock. Returns True if the
1107  lock could be released, False if not.
1108 ****************************************************************************/
1109
1110 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1111 {
1112         SMB_OFF_T offset;
1113         SMB_OFF_T count;
1114         BOOL ret = True;
1115         TALLOC_CTX *ul_ctx = NULL;
1116         struct lock_list *ulist = NULL;
1117         struct lock_list *ul = NULL;
1118         struct posix_lock deleted_lock;
1119         int num_overlapped_entries;
1120
1121         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1122                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1123
1124         /*
1125          * If the requested lock won't fit in the POSIX range, we will
1126          * pretend it was successful.
1127          */
1128
1129         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1130                 return True;
1131
1132         /*
1133          * We treat this as one unlock request for POSIX accounting purposes even
1134          * if it may later be split into multiple smaller POSIX unlock ranges.
1135          * num_overlapped_entries is the number of existing locks that have any
1136          * overlap with this unlock request.
1137          */
1138
1139         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1140
1141         if (num_overlapped_entries == -1) {
1142         smb_panic("release_posix_lock: unable find entry to delete !\n");
1143         }
1144
1145         /*
1146          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1147          * a POSIX write lock, then before doing the unlock we need to downgrade
1148          * the POSIX lock to a read lock. This allows any overlapping read locks
1149          * to be atomically maintained.
1150          */
1151
1152         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1153                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1154                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1155                         return False;
1156                 }
1157         }
1158
1159         if ((ul_ctx = talloc_init()) == NULL) {
1160                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1161                 return True; /* Not a fatal error. */
1162         }
1163
1164         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1165                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1166                 talloc_destroy(ul_ctx);
1167                 return True; /* Not a fatal error. */
1168         }
1169
1170         /*
1171          * Create the initial list entry containing the
1172          * lock we want to remove.
1173          */
1174
1175         ZERO_STRUCTP(ul);
1176         ul->start = offset;
1177         ul->size = count;
1178
1179         DLIST_ADD(ulist, ul);
1180
1181         /*
1182          * The following call calculates if there are any
1183          * overlapping locks held by this process on
1184          * fd's open on the same file and creates a
1185          * list of unlock ranges that will allow
1186          * POSIX lock ranges to remain on the file whilst the
1187          * unlocks are performed.
1188          */
1189
1190         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1191
1192         /*
1193          * Release the POSIX locks on the list of ranges returned.
1194          */
1195
1196         for(; ulist; ulist = ulist->next) {
1197                 offset = ulist->start;
1198                 count = ulist->size;
1199
1200                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1201                         (double)offset, (double)count ));
1202
1203                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1204                         ret = False;
1205         }
1206
1207         talloc_destroy(ul_ctx);
1208
1209         return ret;
1210 }
1211
1212 /****************************************************************************
1213  Remove all lock entries for a specific dev/inode pair from the tdb.
1214 ****************************************************************************/
1215
1216 static void delete_posix_lock_entries(files_struct *fsp)
1217 {
1218         TDB_DATA kbuf = locking_key_fsp(fsp);
1219
1220         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1221                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1222 }
1223
1224 /****************************************************************************
1225  Debug function.
1226 ****************************************************************************/
1227
1228 static void dump_entry(struct posix_lock *pl)
1229 {
1230         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1231                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1232 }
1233
1234 /****************************************************************************
1235  Remove any locks on this fd. Called from file_close().
1236 ****************************************************************************/
1237
1238 void posix_locking_close_file(files_struct *fsp)
1239 {
1240         struct posix_lock *entries = NULL;
1241         size_t count, i;
1242
1243         /*
1244          * Optimization for the common case where we are the only
1245          * opener of a file. If all fd entries are our own, we don't
1246          * need to explicitly release all the locks via the POSIX functions,
1247          * we can just remove all the entries in the tdb and allow the
1248          * close to remove the real locks.
1249          */
1250
1251         count = get_posix_lock_entries(fsp, &entries);
1252
1253         if (count == 0) {
1254                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1255                 return;
1256         }
1257
1258         for (i = 0; i < count; i++) {
1259                 if (entries[i].fd != fsp->fd )
1260                         break;
1261
1262                 dump_entry(&entries[i]);
1263         }
1264
1265         if (i == count) {
1266                 /* All locks are ours. */
1267                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1268                         fsp->fsp_name, (unsigned int)count ));
1269                 free((char *)entries);
1270                 delete_posix_lock_entries(fsp);
1271                 return;
1272         }
1273
1274         /*
1275          * Difficult case. We need to delete all our locks, whilst leaving
1276          * all other POSIX locks in place.
1277          */
1278
1279         for (i = 0; i < count; i++) {
1280                 struct posix_lock *pl = &entries[i];
1281                 if (pl->fd == fsp->fd)
1282                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1283         }
1284         free((char *)entries);
1285 }
1286
1287 /*******************************************************************
1288  Create the in-memory POSIX lock databases.
1289 ********************************************************************/
1290
1291 BOOL posix_locking_init(int read_only)
1292 {
1293         if (posix_lock_tdb && posix_pending_close_tdb)
1294                 return True;
1295
1296         if (!posix_lock_tdb)
1297                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1298                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1299         if (!posix_lock_tdb) {
1300                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1301                 return False;
1302         }
1303         if (!posix_pending_close_tdb)
1304                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1305                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1306         if (!posix_pending_close_tdb) {
1307                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1308                 return False;
1309         }
1310
1311         return True;
1312 }
1313
1314 /*******************************************************************
1315  Delete the in-memory POSIX lock databases.
1316 ********************************************************************/
1317
1318 BOOL posix_locking_end(void)
1319 {
1320     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1321                 return False;
1322     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1323                 return False;
1324         return True;
1325 }