source4/ntvfs/common/brlock.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3
   4    generic byte range locking code
   5
   6    Copyright (C) Andrew Tridgell 1992-2004
   7    Copyright (C) Jeremy Allison 1992-2000
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 /* This module implements a tdb based byte range locking service,
  25    replacing the fcntl() based byte range locking previously
  26    used. This allows us to provide the same semantics as NT */
  27
  28 #include "includes.h"
  29 #include "system/filesys.h"
  30 #include "lib/tdb/include/tdb.h"
  31 #include "messaging/messaging.h"
  32 #include "db_wrap.h"
  33 #include "lib/messaging/irpc.h"
  34 #include "libcli/libcli.h"
  35
  36 /*
  37   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
  38   a file. For a local posix filesystem this will usually be a combination
  39   of the device and inode numbers of the file, but it can be anything
  40   that uniquely idetifies a file for locking purposes, as long
  41   as it is applied consistently.
  42 */
  43
  44 /*
  45   the lock context contains the elements that define whether one
  46   lock is the same as another lock
  47 */
  48 struct lock_context {
  49         uint32_t server;
  50         uint16_t smbpid;
  51         int snum;
  52 };
  53
  54 /* The data in brlock records is an unsorted linear array of these
  55    records.  It is unnecessary to store the count as tdb provides the
  56    size of the record */
  57 struct lock_struct {
  58         struct lock_context context;
  59         uint64_t start;
  60         uint64_t size;
  61         uint16_t fnum;
  62         enum brl_type lock_type;
  63         void *notify_ptr;
  64 };
  65
  66 struct brl_context {
  67         struct tdb_wrap *w;
  68         uint32_t server;
  69         int snum;
  70         struct messaging_context *messaging_ctx;
  71         struct lock_struct last_lock;
  72 };
  73
  74
  75 /*
  76   Open up the brlock.tdb database. Close it down using
  77   talloc_free(). We need the messaging_ctx to allow for
  78   pending lock notifications.
  79 */
  80 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, uint32_t server, int snum,
  81                              struct messaging_context *messaging_ctx)
  82 {
  83         char *path;
  84         struct brl_context *brl;
  85
  86         brl = talloc(mem_ctx, struct brl_context);
  87         if (brl == NULL) {
  88                 return NULL;
  89         }
  90
  91         path = smbd_tmp_path(brl, "brlock.tdb");
  92         brl->w = tdb_wrap_open(brl, path, 0,
  93                                TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
  94         talloc_free(path);
  95         if (brl->w == NULL) {
  96                 talloc_free(brl);
  97                 return NULL;
  98         }
  99
 100         brl->server = server;
 101         brl->snum = snum;
 102         brl->messaging_ctx = messaging_ctx;
 103         ZERO_STRUCT(brl->last_lock);
 104
 105         return brl;
 106 }
 107
 108
 109 /*
 110   see if two locking contexts are equal
 111 */
 112 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
 113 {
 114         return (ctx1->server == ctx2->server &&
 115                 ctx1->smbpid == ctx2->smbpid &&
 116                 ctx1->snum == ctx2->snum);
 117 }
 118
 119 /*
 120   see if lck1 and lck2 overlap
 121 */
 122 static BOOL brl_overlap(struct lock_struct *lck1,
 123                         struct lock_struct *lck2)
 124 {
 125         /* this extra check is not redundent - it copes with locks
 126            that go beyond the end of 64 bit file space */
 127         if (lck1->size != 0 &&
 128             lck1->start == lck2->start &&
 129             lck1->size == lck2->size) {
 130                 return True;
 131         }
 132
 133         if (lck1->start >= (lck2->start+lck2->size) ||
 134             lck2->start >= (lck1->start+lck1->size)) {
 135                 return False;
 136         }
 137         return True;
 138 }
 139
 140 /*
 141  See if lock2 can be added when lock1 is in place.
 142 */
 143 static BOOL brl_conflict(struct lock_struct *lck1,
 144                          struct lock_struct *lck2)
 145 {
 146         /* pending locks don't conflict with anything */
 147         if (lck1->lock_type >= PENDING_READ_LOCK ||
 148             lck2->lock_type >= PENDING_READ_LOCK) {
 149                 return False;
 150         }
 151
 152         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 153                 return False;
 154         }
 155
 156         if (brl_same_context(&lck1->context, &lck2->context) &&
 157             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
 158                 return False;
 159         }
 160
 161         return brl_overlap(lck1, lck2);
 162 }
 163
 164
 165 /*
 166  Check to see if this lock conflicts, but ignore our own locks on the
 167  same fnum only.
 168 */
 169 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 170 {
 171         /* pending locks don't conflict with anything */
 172         if (lck1->lock_type >= PENDING_READ_LOCK ||
 173             lck2->lock_type >= PENDING_READ_LOCK) {
 174                 return False;
 175         }
 176
 177         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
 178                 return False;
 179
 180         /*
 181          * note that incoming write calls conflict with existing READ
 182          * locks even if the context is the same. JRA. See LOCKTEST7
 183          * in smbtorture.
 184          */
 185         if (brl_same_context(&lck1->context, &lck2->context) &&
 186             lck1->fnum == lck2->fnum &&
 187             (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 188                 return False;
 189         }
 190
 191         return brl_overlap(lck1, lck2);
 192 }
 193
 194
 195 /*
 196   amazingly enough, w2k3 "remembers" whether the last lock failure
 197   is the same as this one and changes its error code. I wonder if any
 198   app depends on this?
 199 */
 200 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
 201 {
 202         if (lock->context.server == brl->last_lock.context.server &&
 203             lock->context.snum == brl->last_lock.context.snum &&
 204             lock->fnum == brl->last_lock.fnum &&
 205             lock->start == brl->last_lock.start &&
 206             lock->size == brl->last_lock.size) {
 207                 return NT_STATUS_FILE_LOCK_CONFLICT;
 208         }
 209         brl->last_lock = *lock;
 210         if (lock->start >= 0xEF000000 &&
 211             (lock->start >> 63) == 0) {
 212                 /* amazing the little things you learn with a test
 213                    suite. Locks beyond this offset (as a 64 bit
 214                    number!) always generate the conflict error code,
 215                    unless the top bit is set */
 216                 return NT_STATUS_FILE_LOCK_CONFLICT;
 217         }
 218         return NT_STATUS_LOCK_NOT_GRANTED;
 219 }
 220
 221 /*
 222   Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
 223   which case a real lock is first tried, and if that fails then a
 224   pending lock is created. When the pending lock is triggered (by
 225   someone else closing an overlapping lock range) a messaging
 226   notification is sent, identified by the notify_ptr
 227 */
 228 NTSTATUS brl_lock(struct brl_context *brl,
 229                   DATA_BLOB *file_key,
 230                   uint16_t smbpid,
 231                   uint16_t fnum,
 232                   uint64_t start, uint64_t size,
 233                   enum brl_type lock_type,
 234                   void *notify_ptr)
 235 {
 236         TDB_DATA kbuf, dbuf;
 237         int count=0, i;
 238         struct lock_struct lock, *locks=NULL;
 239         NTSTATUS status;
 240
 241         kbuf.dptr = file_key->data;
 242         kbuf.dsize = file_key->length;
 243
 244         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 245                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 246         }
 247
 248         /* if this is a pending lock, then with the chainlock held we
 249            try to get the real lock. If we succeed then we don't need
 250            to make it pending. This prevents a possible race condition
 251            where the pending lock gets created after the lock that is
 252            preventing the real lock gets removed */
 253         if (lock_type >= PENDING_READ_LOCK) {
 254                 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
 255                 status = brl_lock(brl, file_key, smbpid, fnum, start, size, rw, NULL);
 256                 if (NT_STATUS_IS_OK(status)) {
 257                         tdb_chainunlock(brl->w->tdb, kbuf);
 258                         return NT_STATUS_OK;
 259                 }
 260         }
 261
 262         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 263
 264         lock.context.smbpid = smbpid;
 265         lock.context.server = brl->server;
 266         lock.context.snum = brl->snum;
 267         lock.start = start;
 268         lock.size = size;
 269         lock.fnum = fnum;
 270         lock.lock_type = lock_type;
 271         lock.notify_ptr = notify_ptr;
 272
 273         if (dbuf.dptr) {
 274                 /* there are existing locks - make sure they don't conflict */
 275                 locks = (struct lock_struct *)dbuf.dptr;
 276                 count = dbuf.dsize / sizeof(*locks);
 277                 for (i=0; i<count; i++) {
 278                         if (brl_conflict(&locks[i], &lock)) {
 279                                 status = brl_lock_failed(brl, &lock);
 280                                 goto fail;
 281                         }
 282                 }
 283         }
 284
 285         /* no conflicts - add it to the list of locks */
 286         locks = realloc_p(locks, struct lock_struct, count+1);
 287         if (!locks) {
 288                 status = NT_STATUS_NO_MEMORY;
 289                 goto fail;
 290         } else {
 291                 dbuf.dptr = (uint8_t *)locks;
 292         }
 293         locks[count] = lock;
 294         dbuf.dsize += sizeof(lock);
 295
 296         if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 297                 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 298                 goto fail;
 299         }
 300
 301         free(dbuf.dptr);
 302         tdb_chainunlock(brl->w->tdb, kbuf);
 303
 304         /* the caller needs to know if the real lock was granted. If
 305            we have reached here then it must be a pending lock that
 306            was granted, so tell them the lock failed */
 307         if (lock_type >= PENDING_READ_LOCK) {
 308                 return brl_lock_failed(brl, &lock);
 309         }
 310
 311         return NT_STATUS_OK;
 312
 313  fail:
 314
 315         free(dbuf.dptr);
 316         tdb_chainunlock(brl->w->tdb, kbuf);
 317         return status;
 318 }
 319
 320
 321 /*
 322   we are removing a lock that might be holding up a pending lock. Scan for pending
 323   locks that cover this range and if we find any then notify the server that it should
 324   retry the lock
 325 */
 326 static void brl_notify_unlock(struct brl_context *brl,
 327                               struct lock_struct *locks, int count,
 328                               struct lock_struct *removed_lock)
 329 {
 330         int i, last_notice;
 331
 332         /* the last_notice logic is to prevent stampeding on a lock
 333            range. It prevents us sending hundreds of notifies on the
 334            same range of bytes. It doesn't prevent all possible
 335            stampedes, but it does prevent the most common problem */
 336         last_notice = -1;
 337
 338         for (i=0;i<count;i++) {
 339                 if (locks[i].lock_type >= PENDING_READ_LOCK &&
 340                     brl_overlap(&locks[i], removed_lock)) {
 341                         if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
 342                                 continue;
 343                         }
 344                         if (locks[i].lock_type == PENDING_WRITE_LOCK) {
 345                                 last_notice = i;
 346                         }
 347                         messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
 348                                            MSG_BRL_RETRY, locks[i].notify_ptr);
 349                 }
 350         }
 351 }
 352
 353
 354 /*
 355   send notifications for all pending locks - the file is being closed by this
 356   user
 357 */
 358 static void brl_notify_all(struct brl_context *brl,
 359                            struct lock_struct *locks, int count)
 360 {
 361         int i;
 362         for (i=0;i<count;i++) {
 363                 if (locks->lock_type >= PENDING_READ_LOCK) {
 364                         brl_notify_unlock(brl, locks, count, &locks[i]);
 365                 }
 366         }
 367 }
 368
 369
 370
 371 /*
 372  Unlock a range of bytes.
 373 */
 374 NTSTATUS brl_unlock(struct brl_context *brl,
 375                     DATA_BLOB *file_key,
 376                     uint16_t smbpid,
 377                     uint16_t fnum,
 378                     uint64_t start, uint64_t size)
 379 {
 380         TDB_DATA kbuf, dbuf;
 381         int count, i;
 382         struct lock_struct *locks;
 383         struct lock_context context;
 384         NTSTATUS status;
 385
 386         kbuf.dptr = file_key->data;
 387         kbuf.dsize = file_key->length;
 388
 389         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 390                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 391         }
 392
 393         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 394         if (!dbuf.dptr) {
 395                 tdb_chainunlock(brl->w->tdb, kbuf);
 396                 return NT_STATUS_RANGE_NOT_LOCKED;
 397         }
 398
 399         context.smbpid = smbpid;
 400         context.server = brl->server;
 401         context.snum = brl->snum;
 402
 403         /* there are existing locks - find a match */
 404         locks = (struct lock_struct *)dbuf.dptr;
 405         count = dbuf.dsize / sizeof(*locks);
 406
 407         for (i=0; i<count; i++) {
 408                 struct lock_struct *lock = &locks[i];
 409
 410                 if (brl_same_context(&lock->context, &context) &&
 411                     lock->fnum == fnum &&
 412                     lock->start == start &&
 413                     lock->size == size &&
 414                     lock->notify_ptr == NULL) {
 415                         /* found it - delete it */
 416                         if (count == 1) {
 417                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 418                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 419                                         goto fail;
 420                                 }
 421                         } else {
 422                                 struct lock_struct removed_lock = *lock;
 423                                 if (i < count-1) {
 424                                         memmove(&locks[i], &locks[i+1],
 425                                                 sizeof(*locks)*((count-1) - i));
 426                                 }
 427                                 count--;
 428
 429                                 /* send notifications for any relevant pending locks */
 430                                 brl_notify_unlock(brl, locks, count, &removed_lock);
 431
 432                                 dbuf.dsize = count * sizeof(*locks);
 433
 434                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 435                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 436                                         goto fail;
 437                                 }
 438                         }
 439
 440                         free(dbuf.dptr);
 441                         tdb_chainunlock(brl->w->tdb, kbuf);
 442                         return NT_STATUS_OK;
 443                 }
 444         }
 445
 446         /* we didn't find it */
 447         status = NT_STATUS_RANGE_NOT_LOCKED;
 448
 449  fail:
 450         free(dbuf.dptr);
 451         tdb_chainunlock(brl->w->tdb, kbuf);
 452         return status;
 453 }
 454
 455
 456 /*
 457   remove a pending lock. This is called when the caller has either
 458   given up trying to establish a lock or when they have succeeded in
 459   getting it. In either case they no longer need to be notified.
 460 */
 461 NTSTATUS brl_remove_pending(struct brl_context *brl,
 462                             DATA_BLOB *file_key,
 463                             void *notify_ptr)
 464 {
 465         TDB_DATA kbuf, dbuf;
 466         int count, i;
 467         struct lock_struct *locks;
 468         NTSTATUS status;
 469
 470         kbuf.dptr = file_key->data;
 471         kbuf.dsize = file_key->length;
 472
 473         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 474                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 475         }
 476
 477         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 478         if (!dbuf.dptr) {
 479                 tdb_chainunlock(brl->w->tdb, kbuf);
 480                 return NT_STATUS_RANGE_NOT_LOCKED;
 481         }
 482
 483         /* there are existing locks - find a match */
 484         locks = (struct lock_struct *)dbuf.dptr;
 485         count = dbuf.dsize / sizeof(*locks);
 486
 487         for (i=0; i<count; i++) {
 488                 struct lock_struct *lock = &locks[i];
 489
 490                 if (lock->notify_ptr == notify_ptr &&
 491                     lock->context.server == brl->server) {
 492                         /* found it - delete it */
 493                         if (count == 1) {
 494                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 495                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 496                                         goto fail;
 497                                 }
 498                         } else {
 499                                 if (i < count-1) {
 500                                         memmove(&locks[i], &locks[i+1],
 501                                                 sizeof(*locks)*((count-1) - i));
 502                                 }
 503                                 count--;
 504                                 dbuf.dsize = count * sizeof(*locks);
 505                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 506                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 507                                         goto fail;
 508                                 }
 509                         }
 510
 511                         free(dbuf.dptr);
 512                         tdb_chainunlock(brl->w->tdb, kbuf);
 513                         return NT_STATUS_OK;
 514                 }
 515         }
 516
 517         /* we didn't find it */
 518         status = NT_STATUS_RANGE_NOT_LOCKED;
 519
 520  fail:
 521         free(dbuf.dptr);
 522         tdb_chainunlock(brl->w->tdb, kbuf);
 523         return status;
 524 }
 525
 526
 527 /*
 528   Test if we are allowed to perform IO on a region of an open file
 529 */
 530 NTSTATUS brl_locktest(struct brl_context *brl,
 531                       DATA_BLOB *file_key,
 532                       uint16_t fnum,
 533                       uint16_t smbpid,
 534                       uint64_t start, uint64_t size,
 535                       enum brl_type lock_type)
 536 {
 537         TDB_DATA kbuf, dbuf;
 538         int count, i;
 539         struct lock_struct lock, *locks;
 540
 541         kbuf.dptr = file_key->data;
 542         kbuf.dsize = file_key->length;
 543
 544         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 545         if (dbuf.dptr == NULL) {
 546                 return NT_STATUS_OK;
 547         }
 548
 549         lock.context.smbpid = smbpid;
 550         lock.context.server = brl->server;
 551         lock.context.snum = brl->snum;
 552         lock.start = start;
 553         lock.size = size;
 554         lock.fnum = fnum;
 555         lock.lock_type = lock_type;
 556
 557         /* there are existing locks - make sure they don't conflict */
 558         locks = (struct lock_struct *)dbuf.dptr;
 559         count = dbuf.dsize / sizeof(*locks);
 560
 561         for (i=0; i<count; i++) {
 562                 if (brl_conflict_other(&locks[i], &lock)) {
 563                         free(dbuf.dptr);
 564                         return NT_STATUS_FILE_LOCK_CONFLICT;
 565                 }
 566         }
 567
 568         free(dbuf.dptr);
 569         return NT_STATUS_OK;
 570 }
 571
 572
 573 /*
 574  Remove any locks associated with a open file.
 575 */
 576 NTSTATUS brl_close(struct brl_context *brl,
 577                    DATA_BLOB *file_key, int fnum)
 578 {
 579         TDB_DATA kbuf, dbuf;
 580         int count, i, dcount=0;
 581         struct lock_struct *locks;
 582         NTSTATUS status;
 583
 584         kbuf.dptr = file_key->data;
 585         kbuf.dsize = file_key->length;
 586
 587         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 588                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 589         }
 590
 591         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 592         if (!dbuf.dptr) {
 593                 tdb_chainunlock(brl->w->tdb, kbuf);
 594                 return NT_STATUS_OK;
 595         }
 596
 597         /* there are existing locks - remove any for this fnum */
 598         locks = (struct lock_struct *)dbuf.dptr;
 599         count = dbuf.dsize / sizeof(*locks);
 600
 601         for (i=0; i<count; i++) {
 602                 struct lock_struct *lock = &locks[i];
 603
 604                 if (lock->context.snum == brl->snum &&
 605                     lock->context.server == brl->server &&
 606                     lock->fnum == fnum) {
 607                         /* found it - delete it */
 608                         if (count > 1 && i < count-1) {
 609                                 memmove(&locks[i], &locks[i+1],
 610                                         sizeof(*locks)*((count-1) - i));
 611                         }
 612                         count--;
 613                         i--;
 614                         dcount++;
 615                 }
 616         }
 617
 618         status = NT_STATUS_OK;
 619
 620         if (count == 0) {
 621                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 622                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 623                 }
 624         } else if (dcount != 0) {
 625                 /* tell all pending lock holders for this file that
 626                    they have a chance now. This is a bit indiscriminant,
 627                    but works OK */
 628                 brl_notify_all(brl, locks, count);
 629
 630                 dbuf.dsize = count * sizeof(*locks);
 631
 632                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 633                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 634                 }
 635         }
 636
 637         free(dbuf.dptr);
 638         tdb_chainunlock(brl->w->tdb, kbuf);
 639
 640         return status;
 641 }
 642