source4/ntvfs/common/brlock.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3
   4    generic byte range locking code
   5
   6    Copyright (C) Andrew Tridgell 1992-2004
   7    Copyright (C) Jeremy Allison 1992-2000
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 /* This module implements a tdb based byte range locking service,
  25    replacing the fcntl() based byte range locking previously
  26    used. This allows us to provide the same semantics as NT */
  27
  28 #include "includes.h"
  29
  30 /*
  31   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
  32   a file. For a local posix filesystem this will usually be a combination
  33   of the device and inode numbers of the file, but it can be anything
  34   that uniquely idetifies a file for locking purposes, as long
  35   as it is applied consistently.
  36 */
  37
  38 /*
  39   the lock context contains the elements that define whether one
  40   lock is the same as another lock
  41 */
  42 struct lock_context {
  43         servid_t server;
  44         uint16_t smbpid;
  45         uint16_t tid;
  46 };
  47
  48 /* The data in brlock records is an unsorted linear array of these
  49    records.  It is unnecessary to store the count as tdb provides the
  50    size of the record */
  51 struct lock_struct {
  52         struct lock_context context;
  53         uint64_t start;
  54         uint64_t size;
  55         uint16_t fnum;
  56         enum brl_type lock_type;
  57         void *notify_ptr;
  58 };
  59
  60 struct brl_context {
  61         struct tdb_wrap *w;
  62         servid_t server;
  63         uint16_t tid;
  64         void *messaging_ctx;
  65         struct lock_struct last_lock_failure;
  66 };
  67
  68
  69 /*
  70   Open up the brlock.tdb database. Close it down using
  71   talloc_free(). We need the messaging_ctx to allow for
  72   pending lock notifications.
  73 */
  74 void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid,
  75                void *messaging_ctx)
  76 {
  77         char *path;
  78         struct brl_context *brl;
  79
  80         brl = talloc_p(mem_ctx, struct brl_context);
  81         if (brl == NULL) {
  82                 return NULL;
  83         }
  84
  85         path = lock_path(brl, "brlock.tdb");
  86         brl->w = tdb_wrap_open(brl, path, 0,
  87                                TDB_DEFAULT|TDB_CLEAR_IF_FIRST,
  88                                O_RDWR|O_CREAT, 0600);
  89         talloc_free(path);
  90         if (brl->w == NULL) {
  91                 talloc_free(brl);
  92                 return NULL;
  93         }
  94
  95         brl->server = server;
  96         brl->tid = tid;
  97         brl->messaging_ctx = messaging_ctx;
  98         ZERO_STRUCT(brl->last_lock_failure);
  99
 100         return (void *)brl;
 101 }
 102
 103
 104 /*
 105   see if two locking contexts are equal
 106 */
 107 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
 108 {
 109         return (ctx1->server == ctx2->server &&
 110                 ctx1->smbpid == ctx2->smbpid &&
 111                 ctx1->tid == ctx2->tid);
 112 }
 113
 114 /*
 115   see if lck1 and lck2 overlap
 116 */
 117 static BOOL brl_overlap(struct lock_struct *lck1,
 118                         struct lock_struct *lck2)
 119 {
 120         if (lck1->start >= (lck2->start + lck2->size) ||
 121             lck2->start >= (lck1->start + lck1->size)) {
 122                 return False;
 123         }
 124         return True;
 125 }
 126
 127 /*
 128  See if lock2 can be added when lock1 is in place.
 129 */
 130 static BOOL brl_conflict(struct lock_struct *lck1,
 131                          struct lock_struct *lck2)
 132 {
 133         /* pending locks don't conflict with anything */
 134         if (lck1->lock_type >= PENDING_READ_LOCK ||
 135             lck2->lock_type >= PENDING_READ_LOCK) {
 136                 return False;
 137         }
 138
 139         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 140                 return False;
 141         }
 142
 143         if (brl_same_context(&lck1->context, &lck2->context) &&
 144             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
 145                 return False;
 146         }
 147
 148         return brl_overlap(lck1, lck2);
 149 }
 150
 151
 152 /*
 153  Check to see if this lock conflicts, but ignore our own locks on the
 154  same fnum only.
 155 */
 156 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 157 {
 158         /* pending locks don't conflict with anything */
 159         if (lck1->lock_type >= PENDING_READ_LOCK ||
 160             lck2->lock_type >= PENDING_READ_LOCK) {
 161                 return False;
 162         }
 163
 164         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
 165                 return False;
 166
 167         /*
 168          * note that incoming write calls conflict with existing READ
 169          * locks even if the context is the same. JRA. See LOCKTEST7
 170          * in smbtorture.
 171          */
 172         if (brl_same_context(&lck1->context, &lck2->context) &&
 173             lck1->fnum == lck2->fnum &&
 174             (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 175                 return False;
 176         }
 177
 178         return brl_overlap(lck1, lck2);
 179 }
 180
 181
 182 /*
 183   amazingly enough, w2k3 "remembers" whether the last lock failure
 184   is the same as this one and changes its error code. I wonder if any
 185   app depends on this?
 186 */
 187 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
 188 {
 189         if (brl_same_context(&lock->context, &brl->last_lock_failure.context) &&
 190             lock->fnum == brl->last_lock_failure.fnum &&
 191             lock->start == brl->last_lock_failure.start &&
 192             lock->size == brl->last_lock_failure.size) {
 193                 return NT_STATUS_FILE_LOCK_CONFLICT;
 194         }
 195         brl->last_lock_failure = *lock;
 196         if (lock->start >= 0xEF000000) {
 197                 /* amazing the little things you learn with a test
 198                    suite. Locks beyond this offset (as a 64 bit
 199                    number!) always generate the conflict error
 200                    code. */
 201                 return NT_STATUS_FILE_LOCK_CONFLICT;
 202         }
 203         return NT_STATUS_LOCK_NOT_GRANTED;
 204 }
 205
 206 /*
 207   Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
 208   which case a real lock is first tried, and if that fails then a
 209   pending lock is created. When the pending lock is triggered (by
 210   someone else closing an overlapping lock range) a messaging
 211   notification is sent, identified by the notify_ptr
 212 */
 213 NTSTATUS brl_lock(void *brl_ctx,
 214                   DATA_BLOB *file_key,
 215                   uint16_t smbpid,
 216                   uint16_t fnum,
 217                   uint64_t start, uint64_t size,
 218                   enum brl_type lock_type,
 219                   void *notify_ptr)
 220 {
 221         struct brl_context *brl = brl_ctx;
 222         TDB_DATA kbuf, dbuf;
 223         int count, i;
 224         struct lock_struct lock, *locks;
 225         char *tp;
 226         NTSTATUS status;
 227
 228         kbuf.dptr = file_key->data;
 229         kbuf.dsize = file_key->length;
 230
 231         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 232                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 233         }
 234
 235         /* if this is a pending lock, then with the chainlock held we
 236            try to get the real lock. If we succeed then we don't need
 237            to make it pending. This prevents a possible race condition
 238            where the pending lock gets created after the lock that is
 239            preventing the real lock gets removed */
 240         if (lock_type >= PENDING_READ_LOCK) {
 241                 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
 242                 status = brl_lock(brl_ctx, file_key, smbpid, fnum, start, size, rw, NULL);
 243                 if (NT_STATUS_IS_OK(status)) {
 244                         tdb_chainunlock(brl->w->tdb, kbuf);
 245                         return NT_STATUS_OK;
 246                 }
 247         }
 248
 249         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 250
 251         lock.context.smbpid = smbpid;
 252         lock.context.server = brl->server;
 253         lock.context.tid = brl->tid;
 254         lock.start = start;
 255         lock.size = size;
 256         lock.fnum = fnum;
 257         lock.lock_type = lock_type;
 258         lock.notify_ptr = notify_ptr;
 259
 260         if (dbuf.dptr) {
 261                 /* there are existing locks - make sure they don't conflict */
 262                 locks = (struct lock_struct *)dbuf.dptr;
 263                 count = dbuf.dsize / sizeof(*locks);
 264                 for (i=0; i<count; i++) {
 265                         if (brl_conflict(&locks[i], &lock)) {
 266                                 status = brl_lock_failed(brl, &lock);
 267                                 goto fail;
 268                         }
 269                 }
 270         }
 271
 272         /* no conflicts - add it to the list of locks */
 273         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*locks));
 274         if (!tp) {
 275                 status = NT_STATUS_NO_MEMORY;
 276                 goto fail;
 277         } else {
 278                 dbuf.dptr = tp;
 279         }
 280         memcpy(dbuf.dptr + dbuf.dsize, &lock, sizeof(lock));
 281         dbuf.dsize += sizeof(lock);
 282
 283         if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 284                 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 285                 goto fail;
 286         }
 287
 288         free(dbuf.dptr);
 289         tdb_chainunlock(brl->w->tdb, kbuf);
 290
 291         /* the caller needs to know if the real lock was granted. If
 292            we have reached here then it must be a pending lock that
 293            was granted, so tell them the lock failed */
 294         if (lock_type >= PENDING_READ_LOCK) {
 295                 return brl_lock_failed(brl, &lock);
 296         }
 297
 298         return NT_STATUS_OK;
 299
 300  fail:
 301
 302         free(dbuf.dptr);
 303         tdb_chainunlock(brl->w->tdb, kbuf);
 304         return status;
 305 }
 306
 307
 308 /*
 309   we are removing a lock that might be holding up a pending lock. Scan for pending
 310   locks that cover this range and if we find any then notify the server that it should
 311   retry the lock
 312 */
 313 static void brl_notify_unlock(struct brl_context *brl,
 314                               struct lock_struct *locks, int count,
 315                               struct lock_struct *removed_lock)
 316 {
 317         int i, last_notice;
 318
 319         /* the last_notice logic is to prevent stampeding on a lock
 320            range. It prevents us sending hundreds of notifies on the
 321            same range of bytes. It doesn't prevent all possible
 322            stampedes, but it does prevent the most common problem */
 323         last_notice = -1;
 324
 325         for (i=0;i<count;i++) {
 326                 if (locks[i].lock_type >= PENDING_READ_LOCK &&
 327                     brl_overlap(&locks[i], removed_lock)) {
 328                         DATA_BLOB data;
 329
 330                         if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
 331                                 continue;
 332                         }
 333                         last_notice = i;
 334                         data.data = (void *)&locks[i].notify_ptr;
 335                         data.length = sizeof(void *);
 336                         messaging_send(brl->messaging_ctx, locks[i].context.server, MSG_BRL_RETRY, &data);
 337                 }
 338         }
 339 }
 340
 341
 342 /*
 343   send notifications for all pending locks - the file is being closed by this
 344   user
 345 */
 346 static void brl_notify_all(struct brl_context *brl,
 347                            struct lock_struct *locks, int count)
 348 {
 349         int i;
 350         for (i=0;i<count;i++) {
 351                 if (locks->lock_type >= PENDING_READ_LOCK) {
 352                         brl_notify_unlock(brl, locks, count, &locks[i]);
 353                 }
 354         }
 355 }
 356
 357
 358
 359 /*
 360  Unlock a range of bytes.
 361 */
 362 NTSTATUS brl_unlock(void *brl_ctx,
 363                     DATA_BLOB *file_key,
 364                     uint16_t smbpid,
 365                     uint16_t fnum,
 366                     uint64_t start, uint64_t size)
 367 {
 368         struct brl_context *brl = brl_ctx;
 369         TDB_DATA kbuf, dbuf;
 370         int count, i;
 371         struct lock_struct *locks;
 372         struct lock_context context;
 373         NTSTATUS status;
 374
 375         kbuf.dptr = file_key->data;
 376         kbuf.dsize = file_key->length;
 377
 378         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 379                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 380         }
 381
 382         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 383         if (!dbuf.dptr) {
 384                 tdb_chainunlock(brl->w->tdb, kbuf);
 385                 return NT_STATUS_RANGE_NOT_LOCKED;
 386         }
 387
 388         context.smbpid = smbpid;
 389         context.server = brl->server;
 390         context.tid = brl->tid;
 391
 392         /* there are existing locks - find a match */
 393         locks = (struct lock_struct *)dbuf.dptr;
 394         count = dbuf.dsize / sizeof(*locks);
 395
 396         for (i=0; i<count; i++) {
 397                 struct lock_struct *lock = &locks[i];
 398
 399                 if (brl_same_context(&lock->context, &context) &&
 400                     lock->fnum == fnum &&
 401                     lock->start == start &&
 402                     lock->size == size &&
 403                     lock->notify_ptr == NULL) {
 404                         /* found it - delete it */
 405                         if (count == 1) {
 406                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 407                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 408                                         goto fail;
 409                                 }
 410                         } else {
 411                                 struct lock_struct removed_lock = *lock;
 412                                 if (i < count-1) {
 413                                         memmove(&locks[i], &locks[i+1],
 414                                                 sizeof(*locks)*((count-1) - i));
 415                                 }
 416                                 count--;
 417
 418                                 /* send notifications for any relevant pending locks */
 419                                 brl_notify_unlock(brl, locks, count, &removed_lock);
 420
 421                                 dbuf.dsize = count * sizeof(*locks);
 422
 423                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 424                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 425                                         goto fail;
 426                                 }
 427                         }
 428
 429                         free(dbuf.dptr);
 430                         tdb_chainunlock(brl->w->tdb, kbuf);
 431                         return NT_STATUS_OK;
 432                 }
 433         }
 434
 435         /* we didn't find it */
 436         status = NT_STATUS_RANGE_NOT_LOCKED;
 437
 438  fail:
 439         free(dbuf.dptr);
 440         tdb_chainunlock(brl->w->tdb, kbuf);
 441         return status;
 442 }
 443
 444
 445 /*
 446   remove a pending lock. This is called when the caller has either
 447   given up trying to establish a lock or when they have succeeded in
 448   getting it. In either case they no longer need to be notified.
 449 */
 450 NTSTATUS brl_remove_pending(void *brl_ctx,
 451                             DATA_BLOB *file_key,
 452                             void *notify_ptr)
 453 {
 454         struct brl_context *brl = brl_ctx;
 455         TDB_DATA kbuf, dbuf;
 456         int count, i;
 457         struct lock_struct *locks;
 458         NTSTATUS status;
 459
 460         kbuf.dptr = file_key->data;
 461         kbuf.dsize = file_key->length;
 462
 463         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 464                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 465         }
 466
 467         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 468         if (!dbuf.dptr) {
 469                 tdb_chainunlock(brl->w->tdb, kbuf);
 470                 return NT_STATUS_RANGE_NOT_LOCKED;
 471         }
 472
 473         /* there are existing locks - find a match */
 474         locks = (struct lock_struct *)dbuf.dptr;
 475         count = dbuf.dsize / sizeof(*locks);
 476
 477         for (i=0; i<count; i++) {
 478                 struct lock_struct *lock = &locks[i];
 479
 480                 if (lock->notify_ptr == notify_ptr &&
 481                     lock->context.server == brl->server) {
 482                         /* found it - delete it */
 483                         if (count == 1) {
 484                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 485                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 486                                         goto fail;
 487                                 }
 488                         } else {
 489                                 if (i < count-1) {
 490                                         memmove(&locks[i], &locks[i+1],
 491                                                 sizeof(*locks)*((count-1) - i));
 492                                 }
 493                                 count--;
 494                                 dbuf.dsize = count * sizeof(*locks);
 495                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 496                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 497                                         goto fail;
 498                                 }
 499                         }
 500
 501                         free(dbuf.dptr);
 502                         tdb_chainunlock(brl->w->tdb, kbuf);
 503                         return NT_STATUS_OK;
 504                 }
 505         }
 506
 507         /* we didn't find it */
 508         status = NT_STATUS_RANGE_NOT_LOCKED;
 509
 510  fail:
 511         free(dbuf.dptr);
 512         tdb_chainunlock(brl->w->tdb, kbuf);
 513         return status;
 514 }
 515
 516
 517 /*
 518   Test if we are allowed to perform IO on a region of an open file
 519 */
 520 NTSTATUS brl_locktest(void *brl_ctx,
 521                       DATA_BLOB *file_key,
 522                       uint16_t fnum,
 523                       uint16 smbpid,
 524                       uint64_t start, uint64_t size,
 525                       enum brl_type lock_type)
 526 {
 527         struct brl_context *brl = brl_ctx;
 528         TDB_DATA kbuf, dbuf;
 529         int count, i;
 530         struct lock_struct lock, *locks;
 531
 532         kbuf.dptr = file_key->data;
 533         kbuf.dsize = file_key->length;
 534
 535         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 536         if (dbuf.dptr == NULL) {
 537                 return NT_STATUS_OK;
 538         }
 539
 540         lock.context.smbpid = smbpid;
 541         lock.context.server = brl->server;
 542         lock.context.tid = brl->tid;
 543         lock.start = start;
 544         lock.size = size;
 545         lock.fnum = fnum;
 546         lock.lock_type = lock_type;
 547
 548         /* there are existing locks - make sure they don't conflict */
 549         locks = (struct lock_struct *)dbuf.dptr;
 550         count = dbuf.dsize / sizeof(*locks);
 551
 552         for (i=0; i<count; i++) {
 553                 if (brl_conflict_other(&locks[i], &lock)) {
 554                         free(dbuf.dptr);
 555                         return NT_STATUS_FILE_LOCK_CONFLICT;
 556                 }
 557         }
 558
 559         free(dbuf.dptr);
 560         return NT_STATUS_OK;
 561 }
 562
 563
 564 /*
 565  Remove any locks associated with a open file.
 566 */
 567 NTSTATUS brl_close(void *brl_ctx,
 568                    DATA_BLOB *file_key, int fnum)
 569 {
 570         struct brl_context *brl = brl_ctx;
 571         TDB_DATA kbuf, dbuf;
 572         int count, i, dcount=0;
 573         struct lock_struct *locks;
 574         NTSTATUS status;
 575
 576         kbuf.dptr = file_key->data;
 577         kbuf.dsize = file_key->length;
 578
 579         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 580                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 581         }
 582
 583         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 584         if (!dbuf.dptr) {
 585                 tdb_chainunlock(brl->w->tdb, kbuf);
 586                 return NT_STATUS_OK;
 587         }
 588
 589         /* there are existing locks - remove any for this fnum */
 590         locks = (struct lock_struct *)dbuf.dptr;
 591         count = dbuf.dsize / sizeof(*locks);
 592
 593         for (i=0; i<count; i++) {
 594                 struct lock_struct *lock = &locks[i];
 595
 596                 if (lock->context.tid == brl->tid &&
 597                     lock->context.server == brl->server &&
 598                     lock->fnum == fnum) {
 599                         /* found it - delete it */
 600                         if (count > 1 && i < count-1) {
 601                                 memmove(&locks[i], &locks[i+1],
 602                                         sizeof(*locks)*((count-1) - i));
 603                         }
 604                         count--;
 605                         i--;
 606                         dcount++;
 607                 }
 608         }
 609
 610         status = NT_STATUS_OK;
 611
 612         if (count == 0) {
 613                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 614                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 615                 }
 616         } else if (dcount != 0) {
 617                 /* tell all pending lock holders for this file that
 618                    they have a chance now. This is a bit indiscriminant,
 619                    but works OK */
 620                 brl_notify_all(brl, locks, count);
 621
 622                 dbuf.dsize = count * sizeof(*locks);
 623
 624                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 625                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 626                 }
 627         }
 628
 629         free(dbuf.dptr);
 630         tdb_chainunlock(brl->w->tdb, kbuf);
 631
 632         return status;
 633 }
 634