source3/locking/posix.c

   1 /*
   2    Unix SMB/Netbios implementation.
   3    Version 3.0
   4    Locking functions
   5    Copyright (C) Jeremy Allison 1992-2000
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20
  21    Revision History:
  22
  23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
  24 */
  25
  26 #include "includes.h"
  27 extern int DEBUGLEVEL;
  28 extern int global_smbpid;
  29
  30 /*
  31  * The POSIX locking database handle.
  32  */
  33
  34 static TDB_CONTEXT *posix_lock_tdb;
  35
  36 /*
  37  * The pending close database handle.
  38  */
  39
  40 static TDB_CONTEXT *posix_pending_close_tdb;
  41
  42 /*
  43  * The data in POSIX lock records is an unsorted linear array of these
  44  * records.  It is unnecessary to store the count as tdb provides the
  45  * size of the record.
  46  */
  47
  48 struct posix_lock {
  49         int fd;
  50         SMB_OFF_T start;
  51         SMB_OFF_T size;
  52         int lock_type;
  53 };
  54
  55 /*
  56  * The data in POSIX pending close records is an unsorted linear array of int
  57  * records.  It is unnecessary to store the count as tdb provides the
  58  * size of the record.
  59  */
  60
  61 /* The key used in both the POSIX databases. */
  62
  63 struct posix_lock_key {
  64         SMB_DEV_T device;
  65         SMB_INO_T inode;
  66 };
  67
  68 /*******************************************************************
  69  Form a static locking key for a dev/inode pair.
  70 ******************************************************************/
  71
  72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
  73 {
  74         static struct posix_lock_key key;
  75         TDB_DATA kbuf;
  76         key.device = dev;
  77         key.inode = inode;
  78         kbuf.dptr = (char *)&key;
  79         kbuf.dsize = sizeof(key);
  80         return kbuf;
  81 }
  82
  83 /*******************************************************************
  84  Convenience function to get a key from an fsp.
  85 ******************************************************************/
  86
  87 static TDB_DATA locking_key_fsp(files_struct *fsp)
  88 {
  89         return locking_key(fsp->dev, fsp->inode);
  90 }
  91
  92 /****************************************************************************
  93  Add an fd to the pending close tdb.
  94 ****************************************************************************/
  95
  96 static BOOL add_fd_to_close_entry(files_struct *fsp)
  97 {
  98         TDB_DATA kbuf = locking_key_fsp(fsp);
  99         TDB_DATA dbuf;
 100
 101         dbuf.dptr = NULL;
 102
 103         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 104
 105         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
 106         if (!dbuf.dptr) {
 107                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
 108                 return False;
 109         }
 110         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
 111         dbuf.dsize += sizeof(int);
 112
 113         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 114                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
 115         }
 116
 117         free(dbuf.dptr);
 118         return True;
 119 }
 120
 121 /****************************************************************************
 122  Remove all fd entries for a specific dev/inode pair from the tdb.
 123 ****************************************************************************/
 124
 125 static void delete_close_entries(files_struct *fsp)
 126 {
 127         TDB_DATA kbuf = locking_key_fsp(fsp);
 128
 129         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
 130                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
 131 }
 132
 133 /****************************************************************************
 134  Get the array of POSIX pending close records for an open fsp. Caller must
 135  free. Returns number of entries.
 136 ****************************************************************************/
 137
 138 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
 139 {
 140         TDB_DATA kbuf = locking_key_fsp(fsp);
 141         TDB_DATA dbuf;
 142         size_t count = 0;
 143
 144         *entries = NULL;
 145         dbuf.dptr = NULL;
 146
 147         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
 148
 149     if (!dbuf.dptr) {
 150                 return 0;
 151         }
 152
 153         *entries = (int *)dbuf.dptr;
 154         count = (size_t)(dbuf.dsize / sizeof(int));
 155
 156         return count;
 157 }
 158
 159 /****************************************************************************
 160  Get the array of POSIX locks for an fsp. Caller must free. Returns
 161  number of entries.
 162 ****************************************************************************/
 163
 164 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
 165 {
 166         TDB_DATA kbuf = locking_key_fsp(fsp);
 167         TDB_DATA dbuf;
 168         size_t count = 0;
 169
 170         *entries = NULL;
 171
 172         dbuf.dptr = NULL;
 173
 174         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 175
 176     if (!dbuf.dptr) {
 177                 return 0;
 178         }
 179
 180         *entries = (struct posix_lock *)dbuf.dptr;
 181         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
 182
 183         return count;
 184 }
 185
 186 /****************************************************************************
 187  Deal with pending closes needed by POSIX locking support.
 188  Note that posix_locking_close_file() is expected to have been called
 189  to delete all locks on this fsp before this function is called.
 190 ****************************************************************************/
 191
 192 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
 193 {
 194         int saved_errno = 0;
 195         int ret;
 196         size_t count, i;
 197         struct posix_lock *entries = NULL;
 198         int *fd_array = NULL;
 199         BOOL locks_on_other_fds = False;
 200
 201         if (!lp_posix_locking(SNUM(conn))) {
 202                 /*
 203                  * No POSIX to worry about, just close.
 204                  */
 205                 ret = conn->vfs_ops.close(fsp->fd);
 206                 fsp->fd = -1;
 207                 return ret;
 208         }
 209
 210         /*
 211          * Get the number of outstanding POSIX locks on this dev/inode pair.
 212          */
 213
 214         count = get_posix_lock_entries(fsp, &entries);
 215
 216         /*
 217          * Check if there are any outstanding locks belonging to
 218          * other fd's. This should never be the case if posix_locking_close_file()
 219          * has been called first, but it never hurts to be *sure*.
 220          */
 221
 222         for (i = 0; i < count; i++) {
 223                 if (entries[i].fd != fsp->fd) {
 224                         locks_on_other_fds = True;
 225                         break;
 226                 }
 227         }
 228
 229         if (locks_on_other_fds) {
 230
 231                 /*
 232                  * There are outstanding locks on this dev/inode pair on other fds.
 233                  * Add our fd to the pending close tdb and set fsp->fd to -1.
 234                  */
 235
 236                 if (!add_fd_to_close_entry(fsp)) {
 237                         free((char *)entries);
 238                         return False;
 239                 }
 240
 241                 free((char *)entries);
 242                 fsp->fd = -1;
 243                 return 0;
 244         }
 245
 246         if(entries)
 247                 free((char *)entries);
 248
 249         /*
 250          * No outstanding POSIX locks. Get the pending close fd's
 251          * from the tdb and close them all.
 252          */
 253
 254         count = get_posix_pending_close_entries(fsp, &fd_array);
 255
 256         if (count) {
 257                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
 258
 259                 for(i = 0; i < count; i++) {
 260                         if (conn->vfs_ops.close(fd_array[i]) == -1) {
 261                                 saved_errno = errno;
 262                         }
 263                 }
 264
 265                 /*
 266                  * Delete all fd's stored in the tdb
 267                  * for this dev/inode pair.
 268                  */
 269
 270                 delete_close_entries(fsp);
 271         }
 272
 273         if (fd_array)
 274                 free((char *)fd_array);
 275
 276         /*
 277          * Finally close the fd associated with this fsp.
 278          */
 279
 280         ret = conn->vfs_ops.close(fsp->fd);
 281
 282         if (saved_errno != 0) {
 283         errno = saved_errno;
 284                 ret = -1;
 285     }
 286
 287         fsp->fd = -1;
 288
 289         return ret;
 290 }
 291
 292 /****************************************************************************
 293  Debugging aid :-).
 294 ****************************************************************************/
 295
 296 static const char *posix_lock_type_name(int lock_type)
 297 {
 298         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
 299 }
 300
 301 /****************************************************************************
 302  Add an entry into the POSIX locking tdb. Returns the number of records that
 303  match the given start and size, or -1 on error.
 304 ****************************************************************************/
 305
 306 static int add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type)
 307 {
 308         TDB_DATA kbuf = locking_key_fsp(fsp);
 309         TDB_DATA dbuf;
 310         struct posix_lock pl;
 311         struct posix_lock *entries;
 312         size_t i, count;
 313         int num_records = 0;
 314
 315         /*
 316          * Windows is very strange. It allows read locks to be overlayed on
 317          * a write lock, but leaves the write lock in force until the first
 318          * unlock. It also reference counts the locks. This means the following sequence :
 319          *
 320          * process1                                      process2
 321          * ------------------------------------------------------------------------
 322          * WRITE LOCK : start = 0, len = 10
 323          *                                            READ LOCK: start =0, len = 10 - FAIL
 324          * READ LOCK : start = 0, len = 10
 325          *                                            READ LOCK: start =0, len = 10 - FAIL
 326          * UNLOCK : start = 0, len = 10
 327          *                                            READ LOCK: start =0, len = 10 - OK
 328          *
 329          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
 330          * would leave a single read lock over the 0-10 region. In order to
 331          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
 332          * entries, one for each overlayed lock request. We are guarenteed by the brlock
 333          * semantics that if a write lock is added, then it will be first in the array.
 334          */
 335
 336         dbuf.dptr = NULL;
 337
 338         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 339
 340         /*
 341          * New record.
 342          */
 343
 344         pl.fd = fsp->fd;
 345         pl.start = start;
 346         pl.size = size;
 347         pl.lock_type = lock_type;
 348
 349         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
 350         if (!dbuf.dptr) {
 351                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
 352                 goto fail;
 353         }
 354
 355         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
 356         dbuf.dsize += sizeof(pl);
 357
 358         count = (size_t)(dbuf.dsize / sizeof(pl));
 359         entries = (struct posix_lock *)dbuf.dptr;
 360
 361         for (i = 0; i < count; i++) {
 362                 struct posix_lock *entry = &entries[i];
 363
 364                 if (fsp->fd == entry->fd &&
 365                         start == entry->start &&
 366                         size == entry->size)
 367                         num_records++;
 368
 369         }
 370
 371         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
 372                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
 373                 goto fail;
 374         }
 375
 376     free(dbuf.dptr);
 377
 378         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: num_records = %d : dev=%.0f inode=%.0f\n",
 379                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size, num_records,
 380                         (double)fsp->dev, (double)fsp->inode ));
 381
 382     return num_records;
 383
 384  fail:
 385     if (dbuf.dptr)
 386                 free(dbuf.dptr);
 387     return -1;
 388 }
 389
 390 /****************************************************************************
 391  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
 392  deleted and the number of remaining matching records, or -1 on error.
 393 ****************************************************************************/
 394
 395 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
 396 {
 397         TDB_DATA kbuf = locking_key_fsp(fsp);
 398         TDB_DATA dbuf;
 399         struct posix_lock *locks;
 400         size_t i, count;
 401         int num_records = 0;
 402
 403         dbuf.dptr = NULL;
 404
 405         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 406
 407         if (!dbuf.dptr) {
 408                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
 409                 goto fail;
 410         }
 411
 412         /* There are existing locks - find a match. */
 413         locks = (struct posix_lock *)dbuf.dptr;
 414         count = (size_t)(dbuf.dsize / sizeof(*locks));
 415
 416         /*
 417          * Count the number of entries that match this
 418          * unlock request.
 419          */
 420
 421         for (i = 0; i < count; i++) {
 422                 struct posix_lock *entry = &locks[i];
 423
 424                 if (entry->fd == fsp->fd &&
 425                         entry->start == start &&
 426                         entry->size == size) {
 427                                 num_records++;
 428                 }
 429         }
 430
 431         for (i=0; i<count; i++) {
 432                 struct posix_lock *entry = &locks[i];
 433
 434                 if (entry->fd == fsp->fd &&
 435                         entry->start == start &&
 436                         entry->size == size) {
 437
 438                         num_records--; /* We're deleting one. */
 439
 440                         /* Make a copy if requested. */
 441                         if (pl)
 442                                 *pl = *entry;
 443
 444                         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
 445                                         posix_lock_type_name(entry->lock_type), (double)entry->start, (double)entry->size,
 446                                         (unsigned int)num_records ));
 447
 448                         /* Found it - delete it. */
 449                         if (count == 1) {
 450                                 tdb_delete(posix_lock_tdb, kbuf);
 451                         } else {
 452                                 if (i < count-1) {
 453                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
 454                                 }
 455                                 dbuf.dsize -= sizeof(*locks);
 456                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
 457                         }
 458
 459                         free(dbuf.dptr);
 460                         return num_records;
 461                 }
 462         }
 463
 464         /* We didn't find it. */
 465
 466  fail:
 467     if (dbuf.dptr)
 468                 free(dbuf.dptr);
 469     return -1;
 470 }
 471
 472 /****************************************************************************
 473  Utility function to map a lock type correctly depending on the open
 474  mode of a file.
 475 ****************************************************************************/
 476
 477 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
 478 {
 479         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
 480                 /*
 481                  * Many UNIX's cannot get a write lock on a file opened read-only.
 482                  * Win32 locking semantics allow this.
 483                  * Do the best we can and attempt a read-only lock.
 484                  */
 485                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
 486                 return F_RDLCK;
 487         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
 488                 /*
 489                  * Ditto for read locks on write only files.
 490                  */
 491                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
 492                 return F_WRLCK;
 493         }
 494
 495   /*
 496    * This return should be the most normal, as we attempt
 497    * to always open files read/write.
 498    */
 499
 500   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
 501 }
 502
 503 /****************************************************************************
 504  Check to see if the given unsigned lock range is within the possible POSIX
 505  range. Modifies the given args to be in range if possible, just returns
 506  False if not.
 507 ****************************************************************************/
 508
 509 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
 510                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
 511 {
 512         SMB_OFF_T offset;
 513         SMB_OFF_T count;
 514
 515 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
 516
 517     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
 518     SMB_OFF_T mask = (mask2<<1);
 519     SMB_OFF_T neg_mask = ~mask;
 520
 521         /*
 522          * In this case SMB_OFF_T is 64 bits,
 523          * and the underlying system can handle 64 bit signed locks.
 524          * Cast to signed type.
 525          */
 526
 527         offset = (SMB_OFF_T)u_offset;
 528         count = (SMB_OFF_T)u_count;
 529
 530         /*
 531          * Deal with a very common case of count of all ones.
 532          * (lock entire file).
 533          */
 534
 535         if(count == (SMB_OFF_T)-1)
 536                 count &= ~mask;
 537
 538         /*
 539          * POSIX lock ranges cannot be negative.
 540          * Fail if any combination becomes negative.
 541          */
 542
 543         if(offset < 0 || count < 0 || (offset + count < 0)) {
 544                 DEBUG(10,("posix_lock_in_range: negative range: offset = %.0f, count = %.0f. Ignoring lock.\n",
 545                                 (double)offset, (double)count ));
 546                 return False;
 547         }
 548
 549         /*
 550          * In this case SMB_OFF_T is 64 bits, the offset and count
 551          * fit within the positive range, and the underlying
 552          * system can handle 64 bit locks. Just return as the
 553          * cast values are ok.
 554          */
 555
 556 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 557
 558         /*
 559          * In this case either SMB_OFF_T is 32 bits,
 560          * or the underlying system cannot handle 64 bit signed locks.
 561          * Either way we have to try and mangle to fit within 31 bits.
 562          * This is difficult.
 563          */
 564
 565 #if defined(HAVE_BROKEN_FCNTL64_LOCKS)
 566
 567         /*
 568          * SMB_OFF_T is 64 bits, but we need to use 31 bits due to
 569          * broken large locking.
 570          */
 571
 572         /*
 573          * Deal with a very common case of count of all ones.
 574          * (lock entire file).
 575          */
 576
 577         if(u_count == (SMB_BIG_UINT)-1)
 578                 count = 0x7FFFFFFF;
 579
 580         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
 581                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. offset = %.0f, count = %.0f. Ignoring lock.\n",
 582                                 (double)u_offset, (double)u_count ));
 583                 /* Top 32 bits of offset or count were not zero. */
 584                 return False;
 585         }
 586
 587         /* Cast from 64 bits unsigned to 64 bits signed. */
 588         offset = (SMB_OFF_T)u_offset;
 589         count = (SMB_OFF_T)u_count;
 590
 591         /*
 592          * Check if we are within the 2^31 range.
 593          */
 594
 595         {
 596                 int32 low_offset = (int32)offset;
 597                 int32 low_count = (int32)count;
 598
 599                 if(low_offset < 0 || low_count < 0 || (low_offset + low_count < 0)) {
 600                         DEBUG(10,("posix_lock_in_range: not within 2^31 range. low_offset = %d, low_count = %d. Ignoring lock.\n",
 601                                         low_offset, low_count ));
 602                         return False;
 603                 }
 604         }
 605
 606         /*
 607          * Ok - we can map from a 64 bit number to a 31 bit lock.
 608          */
 609
 610 #else /* HAVE_BROKEN_FCNTL64_LOCKS */
 611
 612         /*
 613          * SMB_OFF_T is 32 bits.
 614          */
 615
 616 #if defined(HAVE_LONGLONG)
 617
 618         /*
 619          * SMB_BIG_UINT is 64 bits, we can do a 32 bit shift.
 620          */
 621
 622         /*
 623          * Deal with a very common case of count of all ones.
 624          * (lock entire file).
 625          */
 626
 627         if(u_count == (SMB_BIG_UINT)-1)
 628                 count = 0x7FFFFFFF;
 629
 630         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
 631                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. u_offset = %.0f, u_count = %.0f. Ignoring lock.\n",
 632                                 (double)u_offset, (double)u_count ));
 633                 return False;
 634         }
 635
 636         /* Cast from 64 bits unsigned to 32 bits signed. */
 637         offset = (SMB_OFF_T)u_offset;
 638         count = (SMB_OFF_T)u_count;
 639
 640         /*
 641          * Check if we are within the 2^31 range.
 642          */
 643
 644         if(offset < 0 || count < 0 || (offset + count < 0)) {
 645                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
 646                                 (int)offset, (int)count ));
 647                 return False;
 648         }
 649
 650 #else /* HAVE_LONGLONG */
 651
 652         /*
 653          * SMB_BIG_UINT and SMB_OFF_T are both 32 bits,
 654          * just cast.
 655          */
 656
 657         /*
 658          * Deal with a very common case of count of all ones.
 659          * (lock entire file).
 660          */
 661
 662         if(u_count == (SMB_BIG_UINT)-1)
 663                 count = 0x7FFFFFFF;
 664
 665         /* Cast from 32 bits unsigned to 32 bits signed. */
 666         offset = (SMB_OFF_T)u_offset;
 667         count = (SMB_OFF_T)u_count;
 668
 669         /*
 670          * Check if we are within the 2^31 range.
 671          */
 672
 673         if(offset < 0 || count < 0 || (offset + count < 0)) {
 674                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
 675                                 (int)offset, (int)count ));
 676                 return False;
 677         }
 678
 679 #endif /* HAVE_LONGLONG */
 680 #endif /* LARGE_SMB_OFF_T */
 681 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
 682
 683         /*
 684          * The mapping was successful.
 685          */
 686
 687         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
 688                         (double)offset, (double)count ));
 689
 690         *offset_out = offset;
 691         *count_out = count;
 692
 693         return True;
 694 }
 695
 696 #if defined(LARGE_SMB_OFF_T)
 697 /****************************************************************************
 698  Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
 699 ****************************************************************************/
 700
 701 static uint32 map_lock_offset(uint32 high, uint32 low)
 702 {
 703         unsigned int i;
 704         uint32 mask = 0;
 705         uint32 highcopy = high;
 706
 707         /*
 708          * Try and find out how many significant bits there are in high.
 709          */
 710
 711         for(i = 0; highcopy; i++)
 712                 highcopy >>= 1;
 713
 714         /*
 715          * We use 31 bits not 32 here as POSIX
 716          * lock offsets may not be negative.
 717          */
 718
 719         mask = (~0) << (31 - i);
 720
 721         if(low & mask)
 722                 return 0; /* Fail. */
 723
 724         high <<= (31 - i);
 725
 726         return (high|low);
 727 }
 728 #endif
 729
 730 /****************************************************************************
 731  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
 732  broken NFS implementations.
 733 ****************************************************************************/
 734
 735 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
 736 {
 737         int ret;
 738         struct connection_struct *conn = fsp->conn;
 739
 740 #if defined(LARGE_SMB_OFF_T)
 741         /*
 742          * In the 64 bit locking case we store the original
 743          * values in case we have to map to a 32 bit lock on
 744          * a filesystem that doesn't support 64 bit locks.
 745          */
 746         SMB_OFF_T orig_offset = offset;
 747         SMB_OFF_T orig_count = count;
 748 #endif /* LARGE_SMB_OFF_T */
 749
 750         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
 751
 752         ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
 753
 754         if (!ret && (errno == EFBIG)) {
 755                 if( DEBUGLVL( 0 )) {
 756                         dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
 757                         dbgtext("a 'file too large' error. This can happen when using 64 bit lock offsets\n");
 758                         dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
 759                 }
 760                 /* 32 bit NFS file system, retry with smaller offset */
 761                 errno = 0;
 762                 count &= 0x7fffffff;
 763                 ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
 764         }
 765
 766         /* A lock query - just return. */
 767         if (op == SMB_F_GETLK)
 768                 return ret;
 769
 770         /* A lock set or unset. */
 771         if (!ret) {
 772                 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
 773                                 (double)offset,(double)count,op,type,strerror(errno)));
 774
 775                 /* Perhaps it doesn't support this sort of locking ? */
 776                 if (errno == EINVAL) {
 777 #if defined(LARGE_SMB_OFF_T)
 778                         {
 779                                 /*
 780                                  * Ok - if we get here then we have a 64 bit lock request
 781                                  * that has returned EINVAL. Try and map to 31 bits for offset
 782                                  * and length and try again. This may happen if a filesystem
 783                                  * doesn't support 64 bit offsets (efs/ufs) although the underlying
 784                                  * OS does.
 785                                  */
 786                                 uint32 off_low = (orig_offset & 0xFFFFFFFF);
 787                                 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
 788
 789                                 count = (orig_count & 0x7FFFFFFF);
 790                                 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
 791                                 ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
 792                                 if (!ret) {
 793                                         if (errno == EINVAL) {
 794                                                 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
 795                                                 return(True);
 796                                         }
 797                                         return False;
 798                                 }
 799                                 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
 800                                 return True;
 801                         }
 802 #else /* LARGE_SMB_OFF_T */
 803                         DEBUG(3,("locking not supported? returning True\n"));
 804                         return(True);
 805 #endif /* LARGE_SMB_OFF_T */
 806                 }
 807
 808                 return(False);
 809         }
 810
 811         DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
 812
 813         return(True);
 814 }
 815
 816 /****************************************************************************
 817  POSIX function to see if a file region is locked. Returns True if the
 818  region is locked, False otherwise.
 819 ****************************************************************************/
 820
 821 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 822 {
 823         SMB_OFF_T offset;
 824         SMB_OFF_T count;
 825         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 826
 827         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
 828                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 829
 830         /*
 831          * If the requested lock won't fit in the POSIX range, we will
 832          * never set it, so presume it is not locked.
 833          */
 834
 835         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 836                 return False;
 837
 838         /*
 839          * Note that most UNIX's can *test* for a write lock on
 840          * a read-only fd, just not *set* a write lock on a read-only
 841          * fd. So we don't need to use map_lock_type here.
 842          */
 843
 844         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
 845 }
 846
 847 /****************************************************************************
 848  POSIX function to acquire a lock. Returns True if the
 849  lock could be granted, False if not.
 850 ****************************************************************************/
 851
 852 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
 853 {
 854         SMB_OFF_T offset;
 855         SMB_OFF_T count;
 856         BOOL ret = True;
 857         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
 858         int ref_count;
 859
 860         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
 861                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
 862
 863         /*
 864          * If the requested lock won't fit in the POSIX range, we will
 865          * pretend it was successful.
 866          */
 867
 868         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
 869                 return True;
 870
 871         /*
 872          * Note that setting multiple overlapping locks on different
 873          * file descriptors will not be held separately by the kernel (POSIX
 874          * braindamage), but will be merged into one continuous lock
 875          * range. We cope with this case in the release_posix_lock code
 876          * below. We need to add the posix lock entry into the tdb before
 877          * doing the real posix lock call to deal with the locking overlay
 878          * case described above in add_posix_lock_entry().
 879          */
 880
 881         ref_count = add_posix_lock_entry(fsp,offset,count,posix_lock_type);
 882
 883         if (ref_count == 1) {
 884                 /*
 885                  * First lock entry created. Do a real POSIX lock.
 886                  */
 887             ret = posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type);
 888
 889                 /*
 890                  * Oops, POSIX lock failed, delete the tdb entry.
 891                  */
 892                 if (!ret)
 893                         delete_posix_lock_entry(fsp,offset,count,NULL);
 894         }
 895
 896         return ret;
 897 }
 898
 899 /*
 900  * Structure used when splitting a lock range
 901  * into a POSIX lock range. Doubly linked list.
 902  */
 903
 904 struct unlock_list {
 905     struct unlock_list *next;
 906     struct unlock_list *prev;
 907     SMB_OFF_T start;
 908     SMB_OFF_T size;
 909 };
 910
 911 /****************************************************************************
 912  Create a list of lock ranges that don't overlap a given range. Used in calculating
 913  POSIX lock unlocks. This is a difficult function that requires ASCII art to
 914  understand it :-).
 915 ****************************************************************************/
 916
 917 static struct unlock_list *posix_unlock_list(TALLOC_CTX *ctx, struct unlock_list *ulhead, files_struct *fsp)
 918 {
 919         TDB_DATA kbuf = locking_key_fsp(fsp);
 920         TDB_DATA dbuf;
 921         struct posix_lock *locks;
 922         size_t num_locks, i;
 923
 924         dbuf.dptr = NULL;
 925
 926         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
 927
 928         if (!dbuf.dptr) {
 929                 return ulhead;
 930         }
 931
 932         locks = (struct posix_lock *)dbuf.dptr;
 933         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
 934
 935         /*
 936          * Check the current lock list on this dev/inode pair.
 937          * Quit if the list is deleted.
 938          */
 939
 940         DEBUG(10,("posix_unlock_list: curr: start=%.0f,size=%.0f\n",
 941                 (double)ulhead->start, (double)ulhead->size ));
 942
 943         for (i=0; i<num_locks && ulhead; i++) {
 944
 945                 struct posix_lock *lock = &locks[i];
 946                 struct unlock_list *ul_curr;
 947
 948                 /*
 949                  * Walk the unlock list, checking for overlaps. Note that
 950                  * the unlock list can expand within this loop if the current
 951                  * range being examined needs to be split.
 952                  */
 953
 954                 for (ul_curr = ulhead; ul_curr;) {
 955
 956                         DEBUG(10,("posix_unlock_list: lock: start=%.0f,size=%.0f:",
 957                                 (double)lock->start, (double)lock->size ));
 958
 959                         if ( (ul_curr->start >= (lock->start + lock->size)) ||
 960                                  (lock->start >= (ul_curr->start + ul_curr->size))) {
 961
 962                                 /* No overlap with this lock - leave this range alone. */
 963 /*********************************************
 964                                              +---------+
 965                                              | ul_curr |
 966                                              +---------+
 967                                 +-------+
 968                                 | lock  |
 969                                 +-------+
 970 OR....
 971              +---------+
 972              | ul_curr |
 973              +---------+
 974 **********************************************/
 975
 976                                 DEBUG(10,("no overlap case.\n" ));
 977
 978                                 ul_curr = ul_curr->next;
 979
 980                         } else if ( (ul_curr->start >= lock->start) &&
 981                                                 (ul_curr->start + ul_curr->size <= lock->start + lock->size) ) {
 982
 983                                 /*
 984                                  * This unlock is completely overlapped by this existing lock range
 985                                  * and thus should have no effect (not be unlocked). Delete it from the list.
 986                                  */
 987 /*********************************************
 988                 +---------+
 989                 | ul_curr |
 990                 +---------+
 991         +---------------------------+
 992         |       lock                |
 993         +---------------------------+
 994 **********************************************/
 995                                 /* Save the next pointer */
 996                                 struct unlock_list *ul_next = ul_curr->next;
 997
 998                                 DEBUG(10,("delete case.\n" ));
 999
1000                                 DLIST_REMOVE(ulhead, ul_curr);
1001                                 if(ulhead == NULL)
1002                                         break; /* No more list... */
1003
1004                                 ul_curr = ul_next;
1005
1006                         } else if ( (ul_curr->start >= lock->start) &&
1007                                                 (ul_curr->start < lock->start + lock->size) &&
1008                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
1009
1010                                 /*
1011                                  * This unlock overlaps the existing lock range at the high end.
1012                                  * Truncate by moving start to existing range end and reducing size.
1013                                  */
1014 /*********************************************
1015                 +---------------+
1016                 | ul_curr       |
1017                 +---------------+
1018         +---------------+
1019         |    lock       |
1020         +---------------+
1021 BECOMES....
1022                         +-------+
1023                         |ul_curr|
1024                         +-------+
1025 **********************************************/
1026
1027                                 ul_curr->size = (ul_curr->start + ul_curr->size) - (lock->start + lock->size);
1028                                 ul_curr->start = lock->start + lock->size;
1029
1030                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
1031                                                                 (double)ul_curr->start, (double)ul_curr->size ));
1032
1033                                 ul_curr = ul_curr->next;
1034
1035                         } else if ( (ul_curr->start < lock->start) &&
1036                                                 (ul_curr->start + ul_curr->size > lock->start) ) {
1037
1038                                 /*
1039                                  * This unlock overlaps the existing lock range at the low end.
1040                                  * Truncate by reducing size.
1041                                  */
1042 /*********************************************
1043    +---------------+
1044    | ul_curr       |
1045    +---------------+
1046            +---------------+
1047            |    lock       |
1048            +---------------+
1049 BECOMES....
1050    +-------+
1051    |ul_curr|
1052    +-------+
1053 **********************************************/
1054
1055                                 ul_curr->size = lock->start - ul_curr->start;
1056
1057                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
1058                                                                 (double)ul_curr->start, (double)ul_curr->size ));
1059
1060                                 ul_curr = ul_curr->next;
1061
1062                         } else if ( (ul_curr->start < lock->start) &&
1063                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
1064                                 /*
1065                                  * Worst case scenario. Unlock request completely overlaps an existing
1066                                  * lock range. Split the request into two, push the new (upper) request
1067                                  * into the dlink list, and continue with the entry after ul_new (as we
1068                                  * know that ul_new will not overlap with this lock).
1069                                  */
1070 /*********************************************
1071         +---------------------------+
1072         |       ul_curr             |
1073         +---------------------------+
1074                 +---------+
1075                 | lock    |
1076                 +---------+
1077 BECOMES.....
1078         +-------+         +---------+
1079         |ul_curr|         |ul_new   |
1080         +-------+         +---------+
1081 **********************************************/
1082                                 struct unlock_list *ul_new = (struct unlock_list *)talloc(ctx,
1083                                                                                                         sizeof(struct unlock_list));
1084
1085                                 if(ul_new == NULL) {
1086                                         DEBUG(0,("posix_unlock_list: talloc fail.\n"));
1087                                         return NULL; /* The talloc_destroy takes care of cleanup. */
1088                                 }
1089
1090                                 ZERO_STRUCTP(ul_new);
1091                                 ul_new->start = lock->start + lock->size;
1092                                 ul_new->size = ul_curr->start + ul_curr->size - ul_new->start;
1093
1094                                 /* Add into the dlink list after the ul_curr point - NOT at ulhead. */
1095                                 DLIST_ADD(ul_curr, ul_new);
1096
1097                                 /* Truncate the ul_curr. */
1098                                 ul_curr->size = lock->start - ul_curr->start;
1099
1100                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
1101 new: start=%.0f,size=%.0f\n", (double)ul_curr->start, (double)ul_curr->size,
1102                                                                 (double)ul_new->start, (double)ul_new->size ));
1103
1104                                 ul_curr = ul_new->next;
1105
1106                         } else {
1107
1108                                 /*
1109                                  * This logic case should never happen. Ensure this is the
1110                                  * case by forcing an abort.... Remove in production.
1111                                  */
1112                                 pstring msg;
1113
1114                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: ul_curr: start = %.0f, size = %.0f : \
1115 lock: start = %.0f, size = %.0f\n", (double)ul_curr->start, (double)ul_curr->size, (double)lock->start, (double)lock->size );
1116
1117                                 smb_panic(msg);
1118                         }
1119                 } /* end for ( ul_curr = ulhead; ul_curr;) */
1120         } /* end for (i=0; i<num_locks && ul_head; i++) */
1121
1122         if (dbuf.dptr)
1123                 free(dbuf.dptr);
1124
1125         return ulhead;
1126 }
1127
1128 /****************************************************************************
1129  POSIX function to release a lock. Returns True if the
1130  lock could be released, False if not.
1131 ****************************************************************************/
1132
1133 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1134 {
1135         SMB_OFF_T offset;
1136         SMB_OFF_T count;
1137         BOOL ret = True;
1138         TALLOC_CTX *ul_ctx = NULL;
1139         struct unlock_list *ulist = NULL;
1140         struct unlock_list *ul = NULL;
1141         struct posix_lock deleted_lock;
1142         int num_entries;
1143
1144         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1145                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1146
1147         /*
1148          * If the requested lock won't fit in the POSIX range, we will
1149          * pretend it was successful.
1150          */
1151
1152         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1153                 return True;
1154
1155         /*
1156          * We treat this as one unlock request for POSIX accounting purposes even
1157          * if it may have been split into multiple smaller POSIX unlock ranges.
1158          */
1159
1160         num_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1161
1162         if (num_entries == -1) {
1163         smb_panic("release_posix_lock: unable find entry to delete !\n");
1164         }
1165
1166         /*
1167          * If num_entries is > 0, and the lock_type we just deleted from the tdb was
1168          * a POSIX write lock, then rather than doing an unlock we need to downgrade
1169          * the POSIX lock to a read lock.
1170          */
1171
1172         if (num_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1173                 return posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK);
1174         }
1175
1176         /*
1177          * Only do the POSIX unlock when the num_entries is now zero.
1178          */
1179
1180         if (num_entries > 0) {
1181                 DEBUG(10, ("release_posix_lock: num_entries = %d\n", num_entries ));
1182                 return True;
1183         }
1184
1185         if ((ul_ctx = talloc_init()) == NULL) {
1186         DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1187                 return True; /* Not a fatal error. */
1188         }
1189
1190         if ((ul = (struct unlock_list *)talloc(ul_ctx, sizeof(struct unlock_list))) == NULL) {
1191                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1192                 talloc_destroy(ul_ctx);
1193                 return True; /* Not a fatal error. */
1194         }
1195
1196         /*
1197          * Create the initial list entry containing the
1198          * lock we want to remove.
1199          */
1200
1201         ZERO_STRUCTP(ul);
1202         ul->start = offset;
1203         ul->size = count;
1204
1205         DLIST_ADD(ulist, ul);
1206
1207         /*
1208          * The following call calculates if there are any
1209          * overlapping locks held by this process on
1210          * fd's open on the same file and creates a
1211          * list of unlock ranges that will allow
1212          * POSIX lock ranges to remain on the file whilst the
1213          * unlocks are performed.
1214          */
1215
1216         ulist = posix_unlock_list(ul_ctx, ulist, fsp);
1217
1218         /*
1219          * Release the POSIX locks on the list of ranges returned.
1220          */
1221
1222         for(; ulist; ulist = ulist->next) {
1223                 offset = ulist->start;
1224                 count = ulist->size;
1225
1226                 if(u_count == 0) {
1227
1228                         /*
1229                          * This lock must overlap with an existing lock.
1230                          * Don't do any POSIX call.
1231                          */
1232
1233                         continue;
1234                 }
1235
1236                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1237                         (double)offset, (double)count ));
1238
1239                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1240                         ret = False;
1241         }
1242
1243         talloc_destroy(ul_ctx);
1244
1245         return ret;
1246 }
1247
1248 /****************************************************************************
1249  Remove all lock entries for a specific dev/inode pair from the tdb.
1250 ****************************************************************************/
1251
1252 static void delete_posix_lock_entries(files_struct *fsp)
1253 {
1254         TDB_DATA kbuf = locking_key_fsp(fsp);
1255
1256         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1257                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1258 }
1259
1260 /****************************************************************************
1261  Debug function.
1262 ****************************************************************************/
1263
1264 static void dump_entry(struct posix_lock *pl)
1265 {
1266         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1267                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1268 }
1269
1270 /****************************************************************************
1271  Remove any locks on this fd. Called from file_close().
1272 ****************************************************************************/
1273
1274 void posix_locking_close_file(files_struct *fsp)
1275 {
1276         struct posix_lock *entries = NULL;
1277         size_t count, i;
1278
1279         /*
1280          * Optimization for the common case where we are the only
1281          * opener of a file. If all fd entries are our own, we don't
1282          * need to explicitly release all the locks via the POSIX functions,
1283          * we can just remove all the entries in the tdb and allow the
1284          * close to remove the real locks.
1285          */
1286
1287         count = get_posix_lock_entries(fsp, &entries);
1288
1289         if (count == 0) {
1290                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1291                 return;
1292         }
1293
1294         for (i = 0; i < count; i++) {
1295                 if (entries[i].fd != fsp->fd )
1296                         break;
1297
1298                 dump_entry(&entries[i]);
1299         }
1300
1301         if (i == count) {
1302                 /* All locks are ours. */
1303                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1304                         fsp->fsp_name, (unsigned int)count ));
1305                 free((char *)entries);
1306                 delete_posix_lock_entries(fsp);
1307                 return;
1308         }
1309
1310         /*
1311          * Difficult case. We need to delete all our locks, whilst leaving
1312          * all other POSIX locks in place.
1313          */
1314
1315         for (i = 0; i < count; i++) {
1316                 struct posix_lock *pl = &entries[i];
1317                 if (pl->fd == fsp->fd)
1318                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1319         }
1320         free((char *)entries);
1321 }
1322
1323 /*******************************************************************
1324  Create the in-memory POSIX lock databases.
1325 ********************************************************************/
1326
1327 BOOL posix_locking_init(void)
1328 {
1329         if (posix_lock_tdb && posix_pending_close_tdb)
1330                 return True;
1331
1332         if (!posix_lock_tdb)
1333                 posix_lock_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1334                                           O_RDWR|O_CREAT, 0644);
1335     if (!posix_lock_tdb) {
1336         DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1337                 return False;
1338     }
1339         if (!posix_pending_close_tdb)
1340                 posix_pending_close_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1341                     O_RDWR|O_CREAT, 0644);
1342     if (!posix_pending_close_tdb) {
1343         DEBUG(0,("Failed to open POSIX pending close database.\n"));
1344                 return False;
1345     }
1346
1347         return True;
1348 }
1349
1350 /*******************************************************************
1351  Delete the in-memory POSIX lock databases.
1352 ********************************************************************/
1353
1354 BOOL posix_locking_end(void)
1355 {
1356     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1357                 return False;
1358     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1359                 return False;
1360         return True;
1361 }