source4/ntvfs/common/brlock.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3
   4    generic byte range locking code
   5
   6    Copyright (C) Andrew Tridgell 1992-2004
   7    Copyright (C) Jeremy Allison 1992-2000
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 /* This module implements a tdb based byte range locking service,
  25    replacing the fcntl() based byte range locking previously
  26    used. This allows us to provide the same semantics as NT */
  27
  28 #include "includes.h"
  29 #include "system/filesys.h"
  30 #include "lib/tdb/include/tdb.h"
  31 #include "messages.h"
  32
  33 /*
  34   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
  35   a file. For a local posix filesystem this will usually be a combination
  36   of the device and inode numbers of the file, but it can be anything
  37   that uniquely idetifies a file for locking purposes, as long
  38   as it is applied consistently.
  39 */
  40
  41 /*
  42   the lock context contains the elements that define whether one
  43   lock is the same as another lock
  44 */
  45 struct lock_context {
  46         uint32_t server;
  47         uint16_t smbpid;
  48         uint16_t tid;
  49 };
  50
  51 /* The data in brlock records is an unsorted linear array of these
  52    records.  It is unnecessary to store the count as tdb provides the
  53    size of the record */
  54 struct lock_struct {
  55         struct lock_context context;
  56         uint64_t start;
  57         uint64_t size;
  58         uint16_t fnum;
  59         enum brl_type lock_type;
  60         void *notify_ptr;
  61 };
  62
  63 struct brl_context {
  64         struct tdb_wrap *w;
  65         uint32_t server;
  66         uint16_t tid;
  67         struct messaging_context *messaging_ctx;
  68         struct lock_struct last_lock;
  69 };
  70
  71
  72 /*
  73   Open up the brlock.tdb database. Close it down using
  74   talloc_free(). We need the messaging_ctx to allow for
  75   pending lock notifications.
  76 */
  77 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, uint32_t server, uint16_t tid,
  78                              struct messaging_context *messaging_ctx)
  79 {
  80         char *path;
  81         struct brl_context *brl;
  82
  83         brl = talloc(mem_ctx, struct brl_context);
  84         if (brl == NULL) {
  85                 return NULL;
  86         }
  87
  88         path = smbd_tmp_path(brl, "brlock.tdb");
  89         brl->w = tdb_wrap_open(brl, path, 0,
  90                                TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
  91         talloc_free(path);
  92         if (brl->w == NULL) {
  93                 talloc_free(brl);
  94                 return NULL;
  95         }
  96
  97         brl->server = server;
  98         brl->tid = tid;
  99         brl->messaging_ctx = messaging_ctx;
 100         ZERO_STRUCT(brl->last_lock);
 101
 102         return brl;
 103 }
 104
 105
 106 /*
 107   see if two locking contexts are equal
 108 */
 109 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
 110 {
 111         return (ctx1->server == ctx2->server &&
 112                 ctx1->smbpid == ctx2->smbpid &&
 113                 ctx1->tid == ctx2->tid);
 114 }
 115
 116 /*
 117   see if lck1 and lck2 overlap
 118 */
 119 static BOOL brl_overlap(struct lock_struct *lck1,
 120                         struct lock_struct *lck2)
 121 {
 122         /* this extra check is not redundent - it copes with locks
 123            that go beyond the end of 64 bit file space */
 124         if (lck1->size != 0 &&
 125             lck1->start == lck2->start &&
 126             lck1->size == lck2->size) {
 127                 return True;
 128         }
 129
 130         if (lck1->start >= (lck2->start+lck2->size) ||
 131             lck2->start >= (lck1->start+lck1->size)) {
 132                 return False;
 133         }
 134         return True;
 135 }
 136
 137 /*
 138  See if lock2 can be added when lock1 is in place.
 139 */
 140 static BOOL brl_conflict(struct lock_struct *lck1,
 141                          struct lock_struct *lck2)
 142 {
 143         /* pending locks don't conflict with anything */
 144         if (lck1->lock_type >= PENDING_READ_LOCK ||
 145             lck2->lock_type >= PENDING_READ_LOCK) {
 146                 return False;
 147         }
 148
 149         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 150                 return False;
 151         }
 152
 153         if (brl_same_context(&lck1->context, &lck2->context) &&
 154             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
 155                 return False;
 156         }
 157
 158         return brl_overlap(lck1, lck2);
 159 }
 160
 161
 162 /*
 163  Check to see if this lock conflicts, but ignore our own locks on the
 164  same fnum only.
 165 */
 166 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 167 {
 168         /* pending locks don't conflict with anything */
 169         if (lck1->lock_type >= PENDING_READ_LOCK ||
 170             lck2->lock_type >= PENDING_READ_LOCK) {
 171                 return False;
 172         }
 173
 174         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
 175                 return False;
 176
 177         /*
 178          * note that incoming write calls conflict with existing READ
 179          * locks even if the context is the same. JRA. See LOCKTEST7
 180          * in smbtorture.
 181          */
 182         if (brl_same_context(&lck1->context, &lck2->context) &&
 183             lck1->fnum == lck2->fnum &&
 184             (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 185                 return False;
 186         }
 187
 188         return brl_overlap(lck1, lck2);
 189 }
 190
 191
 192 /*
 193   amazingly enough, w2k3 "remembers" whether the last lock failure
 194   is the same as this one and changes its error code. I wonder if any
 195   app depends on this?
 196 */
 197 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
 198 {
 199         if (lock->context.server == brl->last_lock.context.server &&
 200             lock->context.tid == brl->last_lock.context.tid &&
 201             lock->fnum == brl->last_lock.fnum &&
 202             lock->start == brl->last_lock.start &&
 203             lock->size == brl->last_lock.size) {
 204                 return NT_STATUS_FILE_LOCK_CONFLICT;
 205         }
 206         brl->last_lock = *lock;
 207         if (lock->start >= 0xEF000000 &&
 208             (lock->start >> 63) == 0) {
 209                 /* amazing the little things you learn with a test
 210                    suite. Locks beyond this offset (as a 64 bit
 211                    number!) always generate the conflict error code,
 212                    unless the top bit is set */
 213                 return NT_STATUS_FILE_LOCK_CONFLICT;
 214         }
 215         return NT_STATUS_LOCK_NOT_GRANTED;
 216 }
 217
 218 /*
 219   Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
 220   which case a real lock is first tried, and if that fails then a
 221   pending lock is created. When the pending lock is triggered (by
 222   someone else closing an overlapping lock range) a messaging
 223   notification is sent, identified by the notify_ptr
 224 */
 225 NTSTATUS brl_lock(struct brl_context *brl,
 226                   DATA_BLOB *file_key,
 227                   uint16_t smbpid,
 228                   uint16_t fnum,
 229                   uint64_t start, uint64_t size,
 230                   enum brl_type lock_type,
 231                   void *notify_ptr)
 232 {
 233         TDB_DATA kbuf, dbuf;
 234         int count=0, i;
 235         struct lock_struct lock, *locks=NULL;
 236         NTSTATUS status;
 237
 238         kbuf.dptr = (char *)file_key->data;
 239         kbuf.dsize = file_key->length;
 240
 241         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 242                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 243         }
 244
 245         /* if this is a pending lock, then with the chainlock held we
 246            try to get the real lock. If we succeed then we don't need
 247            to make it pending. This prevents a possible race condition
 248            where the pending lock gets created after the lock that is
 249            preventing the real lock gets removed */
 250         if (lock_type >= PENDING_READ_LOCK) {
 251                 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
 252                 status = brl_lock(brl, file_key, smbpid, fnum, start, size, rw, NULL);
 253                 if (NT_STATUS_IS_OK(status)) {
 254                         tdb_chainunlock(brl->w->tdb, kbuf);
 255                         return NT_STATUS_OK;
 256                 }
 257         }
 258
 259         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 260
 261         lock.context.smbpid = smbpid;
 262         lock.context.server = brl->server;
 263         lock.context.tid = brl->tid;
 264         lock.start = start;
 265         lock.size = size;
 266         lock.fnum = fnum;
 267         lock.lock_type = lock_type;
 268         lock.notify_ptr = notify_ptr;
 269
 270         if (dbuf.dptr) {
 271                 /* there are existing locks - make sure they don't conflict */
 272                 locks = (struct lock_struct *)dbuf.dptr;
 273                 count = dbuf.dsize / sizeof(*locks);
 274                 for (i=0; i<count; i++) {
 275                         if (brl_conflict(&locks[i], &lock)) {
 276                                 status = brl_lock_failed(brl, &lock);
 277                                 goto fail;
 278                         }
 279                 }
 280         }
 281
 282         /* no conflicts - add it to the list of locks */
 283         locks = realloc_p(locks, struct lock_struct, count+1);
 284         if (!locks) {
 285                 status = NT_STATUS_NO_MEMORY;
 286                 goto fail;
 287         } else {
 288                 dbuf.dptr = (char *)locks;
 289         }
 290         locks[count] = lock;
 291         dbuf.dsize += sizeof(lock);
 292
 293         if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 294                 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 295                 goto fail;
 296         }
 297
 298         free(dbuf.dptr);
 299         tdb_chainunlock(brl->w->tdb, kbuf);
 300
 301         /* the caller needs to know if the real lock was granted. If
 302            we have reached here then it must be a pending lock that
 303            was granted, so tell them the lock failed */
 304         if (lock_type >= PENDING_READ_LOCK) {
 305                 return brl_lock_failed(brl, &lock);
 306         }
 307
 308         return NT_STATUS_OK;
 309
 310  fail:
 311
 312         free(dbuf.dptr);
 313         tdb_chainunlock(brl->w->tdb, kbuf);
 314         return status;
 315 }
 316
 317
 318 /*
 319   we are removing a lock that might be holding up a pending lock. Scan for pending
 320   locks that cover this range and if we find any then notify the server that it should
 321   retry the lock
 322 */
 323 static void brl_notify_unlock(struct brl_context *brl,
 324                               struct lock_struct *locks, int count,
 325                               struct lock_struct *removed_lock)
 326 {
 327         int i, last_notice;
 328
 329         /* the last_notice logic is to prevent stampeding on a lock
 330            range. It prevents us sending hundreds of notifies on the
 331            same range of bytes. It doesn't prevent all possible
 332            stampedes, but it does prevent the most common problem */
 333         last_notice = -1;
 334
 335         for (i=0;i<count;i++) {
 336                 if (locks[i].lock_type >= PENDING_READ_LOCK &&
 337                     brl_overlap(&locks[i], removed_lock)) {
 338                         if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
 339                                 continue;
 340                         }
 341                         if (locks[i].lock_type == PENDING_WRITE_LOCK) {
 342                                 last_notice = i;
 343                         }
 344                         messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
 345                                            MSG_BRL_RETRY, locks[i].notify_ptr);
 346                 }
 347         }
 348 }
 349
 350
 351 /*
 352   send notifications for all pending locks - the file is being closed by this
 353   user
 354 */
 355 static void brl_notify_all(struct brl_context *brl,
 356                            struct lock_struct *locks, int count)
 357 {
 358         int i;
 359         for (i=0;i<count;i++) {
 360                 if (locks->lock_type >= PENDING_READ_LOCK) {
 361                         brl_notify_unlock(brl, locks, count, &locks[i]);
 362                 }
 363         }
 364 }
 365
 366
 367
 368 /*
 369  Unlock a range of bytes.
 370 */
 371 NTSTATUS brl_unlock(struct brl_context *brl,
 372                     DATA_BLOB *file_key,
 373                     uint16_t smbpid,
 374                     uint16_t fnum,
 375                     uint64_t start, uint64_t size)
 376 {
 377         TDB_DATA kbuf, dbuf;
 378         int count, i;
 379         struct lock_struct *locks;
 380         struct lock_context context;
 381         NTSTATUS status;
 382
 383         kbuf.dptr = (char *)file_key->data;
 384         kbuf.dsize = file_key->length;
 385
 386         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 387                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 388         }
 389
 390         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 391         if (!dbuf.dptr) {
 392                 tdb_chainunlock(brl->w->tdb, kbuf);
 393                 return NT_STATUS_RANGE_NOT_LOCKED;
 394         }
 395
 396         context.smbpid = smbpid;
 397         context.server = brl->server;
 398         context.tid = brl->tid;
 399
 400         /* there are existing locks - find a match */
 401         locks = (struct lock_struct *)dbuf.dptr;
 402         count = dbuf.dsize / sizeof(*locks);
 403
 404         for (i=0; i<count; i++) {
 405                 struct lock_struct *lock = &locks[i];
 406
 407                 if (brl_same_context(&lock->context, &context) &&
 408                     lock->fnum == fnum &&
 409                     lock->start == start &&
 410                     lock->size == size &&
 411                     lock->notify_ptr == NULL) {
 412                         /* found it - delete it */
 413                         if (count == 1) {
 414                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 415                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 416                                         goto fail;
 417                                 }
 418                         } else {
 419                                 struct lock_struct removed_lock = *lock;
 420                                 if (i < count-1) {
 421                                         memmove(&locks[i], &locks[i+1],
 422                                                 sizeof(*locks)*((count-1) - i));
 423                                 }
 424                                 count--;
 425
 426                                 /* send notifications for any relevant pending locks */
 427                                 brl_notify_unlock(brl, locks, count, &removed_lock);
 428
 429                                 dbuf.dsize = count * sizeof(*locks);
 430
 431                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 432                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 433                                         goto fail;
 434                                 }
 435                         }
 436
 437                         free(dbuf.dptr);
 438                         tdb_chainunlock(brl->w->tdb, kbuf);
 439                         return NT_STATUS_OK;
 440                 }
 441         }
 442
 443         /* we didn't find it */
 444         status = NT_STATUS_RANGE_NOT_LOCKED;
 445
 446  fail:
 447         free(dbuf.dptr);
 448         tdb_chainunlock(brl->w->tdb, kbuf);
 449         return status;
 450 }
 451
 452
 453 /*
 454   remove a pending lock. This is called when the caller has either
 455   given up trying to establish a lock or when they have succeeded in
 456   getting it. In either case they no longer need to be notified.
 457 */
 458 NTSTATUS brl_remove_pending(struct brl_context *brl,
 459                             DATA_BLOB *file_key,
 460                             void *notify_ptr)
 461 {
 462         TDB_DATA kbuf, dbuf;
 463         int count, i;
 464         struct lock_struct *locks;
 465         NTSTATUS status;
 466
 467         kbuf.dptr = (char *)file_key->data;
 468         kbuf.dsize = file_key->length;
 469
 470         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 471                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 472         }
 473
 474         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 475         if (!dbuf.dptr) {
 476                 tdb_chainunlock(brl->w->tdb, kbuf);
 477                 return NT_STATUS_RANGE_NOT_LOCKED;
 478         }
 479
 480         /* there are existing locks - find a match */
 481         locks = (struct lock_struct *)dbuf.dptr;
 482         count = dbuf.dsize / sizeof(*locks);
 483
 484         for (i=0; i<count; i++) {
 485                 struct lock_struct *lock = &locks[i];
 486
 487                 if (lock->notify_ptr == notify_ptr &&
 488                     lock->context.server == brl->server) {
 489                         /* found it - delete it */
 490                         if (count == 1) {
 491                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 492                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 493                                         goto fail;
 494                                 }
 495                         } else {
 496                                 if (i < count-1) {
 497                                         memmove(&locks[i], &locks[i+1],
 498                                                 sizeof(*locks)*((count-1) - i));
 499                                 }
 500                                 count--;
 501                                 dbuf.dsize = count * sizeof(*locks);
 502                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 503                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 504                                         goto fail;
 505                                 }
 506                         }
 507
 508                         free(dbuf.dptr);
 509                         tdb_chainunlock(brl->w->tdb, kbuf);
 510                         return NT_STATUS_OK;
 511                 }
 512         }
 513
 514         /* we didn't find it */
 515         status = NT_STATUS_RANGE_NOT_LOCKED;
 516
 517  fail:
 518         free(dbuf.dptr);
 519         tdb_chainunlock(brl->w->tdb, kbuf);
 520         return status;
 521 }
 522
 523
 524 /*
 525   Test if we are allowed to perform IO on a region of an open file
 526 */
 527 NTSTATUS brl_locktest(struct brl_context *brl,
 528                       DATA_BLOB *file_key,
 529                       uint16_t fnum,
 530                       uint16_t smbpid,
 531                       uint64_t start, uint64_t size,
 532                       enum brl_type lock_type)
 533 {
 534         TDB_DATA kbuf, dbuf;
 535         int count, i;
 536         struct lock_struct lock, *locks;
 537
 538         kbuf.dptr = (char *)file_key->data;
 539         kbuf.dsize = file_key->length;
 540
 541         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 542         if (dbuf.dptr == NULL) {
 543                 return NT_STATUS_OK;
 544         }
 545
 546         lock.context.smbpid = smbpid;
 547         lock.context.server = brl->server;
 548         lock.context.tid = brl->tid;
 549         lock.start = start;
 550         lock.size = size;
 551         lock.fnum = fnum;
 552         lock.lock_type = lock_type;
 553
 554         /* there are existing locks - make sure they don't conflict */
 555         locks = (struct lock_struct *)dbuf.dptr;
 556         count = dbuf.dsize / sizeof(*locks);
 557
 558         for (i=0; i<count; i++) {
 559                 if (brl_conflict_other(&locks[i], &lock)) {
 560                         free(dbuf.dptr);
 561                         return NT_STATUS_FILE_LOCK_CONFLICT;
 562                 }
 563         }
 564
 565         free(dbuf.dptr);
 566         return NT_STATUS_OK;
 567 }
 568
 569
 570 /*
 571  Remove any locks associated with a open file.
 572 */
 573 NTSTATUS brl_close(struct brl_context *brl,
 574                    DATA_BLOB *file_key, int fnum)
 575 {
 576         TDB_DATA kbuf, dbuf;
 577         int count, i, dcount=0;
 578         struct lock_struct *locks;
 579         NTSTATUS status;
 580
 581         kbuf.dptr = (char *)file_key->data;
 582         kbuf.dsize = file_key->length;
 583
 584         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 585                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 586         }
 587
 588         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 589         if (!dbuf.dptr) {
 590                 tdb_chainunlock(brl->w->tdb, kbuf);
 591                 return NT_STATUS_OK;
 592         }
 593
 594         /* there are existing locks - remove any for this fnum */
 595         locks = (struct lock_struct *)dbuf.dptr;
 596         count = dbuf.dsize / sizeof(*locks);
 597
 598         for (i=0; i<count; i++) {
 599                 struct lock_struct *lock = &locks[i];
 600
 601                 if (lock->context.tid == brl->tid &&
 602                     lock->context.server == brl->server &&
 603                     lock->fnum == fnum) {
 604                         /* found it - delete it */
 605                         if (count > 1 && i < count-1) {
 606                                 memmove(&locks[i], &locks[i+1],
 607                                         sizeof(*locks)*((count-1) - i));
 608                         }
 609                         count--;
 610                         i--;
 611                         dcount++;
 612                 }
 613         }
 614
 615         status = NT_STATUS_OK;
 616
 617         if (count == 0) {
 618                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 619                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 620                 }
 621         } else if (dcount != 0) {
 622                 /* tell all pending lock holders for this file that
 623                    they have a chance now. This is a bit indiscriminant,
 624                    but works OK */
 625                 brl_notify_all(brl, locks, count);
 626
 627                 dbuf.dsize = count * sizeof(*locks);
 628
 629                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 630                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 631                 }
 632         }
 633
 634         free(dbuf.dptr);
 635         tdb_chainunlock(brl->w->tdb, kbuf);
 636
 637         return status;
 638 }
 639