source4/ntvfs/common/brlock.c

   1 /*
   2    Unix SMB/CIFS implementation.
   3
   4    generic byte range locking code
   5
   6    Copyright (C) Andrew Tridgell 1992-2004
   7    Copyright (C) Jeremy Allison 1992-2000
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 2 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 */
  23
  24 /* This module implements a tdb based byte range locking service,
  25    replacing the fcntl() based byte range locking previously
  26    used. This allows us to provide the same semantics as NT */
  27
  28 #include "includes.h"
  29 #include "messages.h"
  30
  31 /*
  32   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
  33   a file. For a local posix filesystem this will usually be a combination
  34   of the device and inode numbers of the file, but it can be anything
  35   that uniquely idetifies a file for locking purposes, as long
  36   as it is applied consistently.
  37 */
  38
  39 /*
  40   the lock context contains the elements that define whether one
  41   lock is the same as another lock
  42 */
  43 struct lock_context {
  44         servid_t server;
  45         uint16_t smbpid;
  46         uint16_t tid;
  47 };
  48
  49 /* The data in brlock records is an unsorted linear array of these
  50    records.  It is unnecessary to store the count as tdb provides the
  51    size of the record */
  52 struct lock_struct {
  53         struct lock_context context;
  54         uint64_t start;
  55         uint64_t size;
  56         uint16_t fnum;
  57         enum brl_type lock_type;
  58         void *notify_ptr;
  59 };
  60
  61 struct brl_context {
  62         struct tdb_wrap *w;
  63         servid_t server;
  64         uint16_t tid;
  65         struct messaging_context *messaging_ctx;
  66         struct lock_struct last_lock_failure;
  67 };
  68
  69
  70 /*
  71   Open up the brlock.tdb database. Close it down using
  72   talloc_free(). We need the messaging_ctx to allow for
  73   pending lock notifications.
  74 */
  75 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid,
  76                              struct messaging_context *messaging_ctx)
  77 {
  78         char *path;
  79         struct brl_context *brl;
  80
  81         brl = talloc_p(mem_ctx, struct brl_context);
  82         if (brl == NULL) {
  83                 return NULL;
  84         }
  85
  86         path = smbd_tmp_path(brl, "brlock.tdb");
  87         brl->w = tdb_wrap_open(brl, path, 0,
  88                                TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
  89         talloc_free(path);
  90         if (brl->w == NULL) {
  91                 talloc_free(brl);
  92                 return NULL;
  93         }
  94
  95         brl->server = server;
  96         brl->tid = tid;
  97         brl->messaging_ctx = messaging_ctx;
  98         ZERO_STRUCT(brl->last_lock_failure);
  99
 100         return brl;
 101 }
 102
 103
 104 /*
 105   see if two locking contexts are equal
 106 */
 107 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
 108 {
 109         return (ctx1->server == ctx2->server &&
 110                 ctx1->smbpid == ctx2->smbpid &&
 111                 ctx1->tid == ctx2->tid);
 112 }
 113
 114 /*
 115   see if lck1 and lck2 overlap
 116 */
 117 static BOOL brl_overlap(struct lock_struct *lck1,
 118                         struct lock_struct *lck2)
 119 {
 120         /* this extra check is not redundent - it copes with locks
 121            that go beyond the end of 64 bit file space */
 122         if (lck1->size != 0 &&
 123             lck1->start == lck2->start &&
 124             lck1->size == lck2->size) {
 125                 return True;
 126         }
 127
 128         if (lck1->start >= (lck2->start+lck2->size) ||
 129             lck2->start >= (lck1->start+lck1->size)) {
 130                 return False;
 131         }
 132         return True;
 133 }
 134
 135 /*
 136  See if lock2 can be added when lock1 is in place.
 137 */
 138 static BOOL brl_conflict(struct lock_struct *lck1,
 139                          struct lock_struct *lck2)
 140 {
 141         /* pending locks don't conflict with anything */
 142         if (lck1->lock_type >= PENDING_READ_LOCK ||
 143             lck2->lock_type >= PENDING_READ_LOCK) {
 144                 return False;
 145         }
 146
 147         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 148                 return False;
 149         }
 150
 151         if (brl_same_context(&lck1->context, &lck2->context) &&
 152             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
 153                 return False;
 154         }
 155
 156         return brl_overlap(lck1, lck2);
 157 }
 158
 159
 160 /*
 161  Check to see if this lock conflicts, but ignore our own locks on the
 162  same fnum only.
 163 */
 164 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 165 {
 166         /* pending locks don't conflict with anything */
 167         if (lck1->lock_type >= PENDING_READ_LOCK ||
 168             lck2->lock_type >= PENDING_READ_LOCK) {
 169                 return False;
 170         }
 171
 172         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
 173                 return False;
 174
 175         /*
 176          * note that incoming write calls conflict with existing READ
 177          * locks even if the context is the same. JRA. See LOCKTEST7
 178          * in smbtorture.
 179          */
 180         if (brl_same_context(&lck1->context, &lck2->context) &&
 181             lck1->fnum == lck2->fnum &&
 182             (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 183                 return False;
 184         }
 185
 186         return brl_overlap(lck1, lck2);
 187 }
 188
 189
 190 /*
 191   amazingly enough, w2k3 "remembers" whether the last lock failure
 192   is the same as this one and changes its error code. I wonder if any
 193   app depends on this?
 194 */
 195 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
 196 {
 197         if (brl_same_context(&lock->context, &brl->last_lock_failure.context) &&
 198             lock->fnum == brl->last_lock_failure.fnum &&
 199             lock->start == brl->last_lock_failure.start &&
 200             lock->size == brl->last_lock_failure.size) {
 201                 return NT_STATUS_FILE_LOCK_CONFLICT;
 202         }
 203         brl->last_lock_failure = *lock;
 204         if (lock->start >= 0xEF000000 &&
 205             (lock->start >> 63) == 0) {
 206                 /* amazing the little things you learn with a test
 207                    suite. Locks beyond this offset (as a 64 bit
 208                    number!) always generate the conflict error code,
 209                    unless the top bit is set */
 210                 return NT_STATUS_FILE_LOCK_CONFLICT;
 211         }
 212         return NT_STATUS_LOCK_NOT_GRANTED;
 213 }
 214
 215 /*
 216   Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
 217   which case a real lock is first tried, and if that fails then a
 218   pending lock is created. When the pending lock is triggered (by
 219   someone else closing an overlapping lock range) a messaging
 220   notification is sent, identified by the notify_ptr
 221 */
 222 NTSTATUS brl_lock(struct brl_context *brl,
 223                   DATA_BLOB *file_key,
 224                   uint16_t smbpid,
 225                   uint16_t fnum,
 226                   uint64_t start, uint64_t size,
 227                   enum brl_type lock_type,
 228                   void *notify_ptr)
 229 {
 230         TDB_DATA kbuf, dbuf;
 231         int count, i;
 232         struct lock_struct lock, *locks;
 233         char *tp;
 234         NTSTATUS status;
 235
 236         kbuf.dptr = file_key->data;
 237         kbuf.dsize = file_key->length;
 238
 239         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 240                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 241         }
 242
 243         /* if this is a pending lock, then with the chainlock held we
 244            try to get the real lock. If we succeed then we don't need
 245            to make it pending. This prevents a possible race condition
 246            where the pending lock gets created after the lock that is
 247            preventing the real lock gets removed */
 248         if (lock_type >= PENDING_READ_LOCK) {
 249                 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
 250                 status = brl_lock(brl, file_key, smbpid, fnum, start, size, rw, NULL);
 251                 if (NT_STATUS_IS_OK(status)) {
 252                         tdb_chainunlock(brl->w->tdb, kbuf);
 253                         return NT_STATUS_OK;
 254                 }
 255         }
 256
 257         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 258
 259         lock.context.smbpid = smbpid;
 260         lock.context.server = brl->server;
 261         lock.context.tid = brl->tid;
 262         lock.start = start;
 263         lock.size = size;
 264         lock.fnum = fnum;
 265         lock.lock_type = lock_type;
 266         lock.notify_ptr = notify_ptr;
 267
 268         if (dbuf.dptr) {
 269                 /* there are existing locks - make sure they don't conflict */
 270                 locks = (struct lock_struct *)dbuf.dptr;
 271                 count = dbuf.dsize / sizeof(*locks);
 272                 for (i=0; i<count; i++) {
 273                         if (brl_conflict(&locks[i], &lock)) {
 274                                 status = brl_lock_failed(brl, &lock);
 275                                 goto fail;
 276                         }
 277                 }
 278         }
 279
 280         /* no conflicts - add it to the list of locks */
 281         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*locks));
 282         if (!tp) {
 283                 status = NT_STATUS_NO_MEMORY;
 284                 goto fail;
 285         } else {
 286                 dbuf.dptr = tp;
 287         }
 288         memcpy(dbuf.dptr + dbuf.dsize, &lock, sizeof(lock));
 289         dbuf.dsize += sizeof(lock);
 290
 291         if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 292                 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 293                 goto fail;
 294         }
 295
 296         free(dbuf.dptr);
 297         tdb_chainunlock(brl->w->tdb, kbuf);
 298
 299         /* the caller needs to know if the real lock was granted. If
 300            we have reached here then it must be a pending lock that
 301            was granted, so tell them the lock failed */
 302         if (lock_type >= PENDING_READ_LOCK) {
 303                 return brl_lock_failed(brl, &lock);
 304         }
 305
 306         return NT_STATUS_OK;
 307
 308  fail:
 309
 310         free(dbuf.dptr);
 311         tdb_chainunlock(brl->w->tdb, kbuf);
 312         return status;
 313 }
 314
 315
 316 /*
 317   we are removing a lock that might be holding up a pending lock. Scan for pending
 318   locks that cover this range and if we find any then notify the server that it should
 319   retry the lock
 320 */
 321 static void brl_notify_unlock(struct brl_context *brl,
 322                               struct lock_struct *locks, int count,
 323                               struct lock_struct *removed_lock)
 324 {
 325         int i, last_notice;
 326
 327         /* the last_notice logic is to prevent stampeding on a lock
 328            range. It prevents us sending hundreds of notifies on the
 329            same range of bytes. It doesn't prevent all possible
 330            stampedes, but it does prevent the most common problem */
 331         last_notice = -1;
 332
 333         for (i=0;i<count;i++) {
 334                 if (locks[i].lock_type >= PENDING_READ_LOCK &&
 335                     brl_overlap(&locks[i], removed_lock)) {
 336                         DATA_BLOB data;
 337
 338                         if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
 339                                 continue;
 340                         }
 341                         if (locks[i].lock_type == PENDING_WRITE_LOCK) {
 342                                 last_notice = i;
 343                         }
 344                         data.data = (void *)&locks[i].notify_ptr;
 345                         data.length = sizeof(void *);
 346                         messaging_send(brl->messaging_ctx, locks[i].context.server, MSG_BRL_RETRY, &data);
 347                 }
 348         }
 349 }
 350
 351
 352 /*
 353   send notifications for all pending locks - the file is being closed by this
 354   user
 355 */
 356 static void brl_notify_all(struct brl_context *brl,
 357                            struct lock_struct *locks, int count)
 358 {
 359         int i;
 360         for (i=0;i<count;i++) {
 361                 if (locks->lock_type >= PENDING_READ_LOCK) {
 362                         brl_notify_unlock(brl, locks, count, &locks[i]);
 363                 }
 364         }
 365 }
 366
 367
 368
 369 /*
 370  Unlock a range of bytes.
 371 */
 372 NTSTATUS brl_unlock(struct brl_context *brl,
 373                     DATA_BLOB *file_key,
 374                     uint16_t smbpid,
 375                     uint16_t fnum,
 376                     uint64_t start, uint64_t size)
 377 {
 378         TDB_DATA kbuf, dbuf;
 379         int count, i;
 380         struct lock_struct *locks;
 381         struct lock_context context;
 382         NTSTATUS status;
 383
 384         kbuf.dptr = file_key->data;
 385         kbuf.dsize = file_key->length;
 386
 387         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 388                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 389         }
 390
 391         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 392         if (!dbuf.dptr) {
 393                 tdb_chainunlock(brl->w->tdb, kbuf);
 394                 return NT_STATUS_RANGE_NOT_LOCKED;
 395         }
 396
 397         context.smbpid = smbpid;
 398         context.server = brl->server;
 399         context.tid = brl->tid;
 400
 401         /* there are existing locks - find a match */
 402         locks = (struct lock_struct *)dbuf.dptr;
 403         count = dbuf.dsize / sizeof(*locks);
 404
 405         for (i=0; i<count; i++) {
 406                 struct lock_struct *lock = &locks[i];
 407
 408                 if (brl_same_context(&lock->context, &context) &&
 409                     lock->fnum == fnum &&
 410                     lock->start == start &&
 411                     lock->size == size &&
 412                     lock->notify_ptr == NULL) {
 413                         /* found it - delete it */
 414                         if (count == 1) {
 415                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 416                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 417                                         goto fail;
 418                                 }
 419                         } else {
 420                                 struct lock_struct removed_lock = *lock;
 421                                 if (i < count-1) {
 422                                         memmove(&locks[i], &locks[i+1],
 423                                                 sizeof(*locks)*((count-1) - i));
 424                                 }
 425                                 count--;
 426
 427                                 /* send notifications for any relevant pending locks */
 428                                 brl_notify_unlock(brl, locks, count, &removed_lock);
 429
 430                                 dbuf.dsize = count * sizeof(*locks);
 431
 432                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 433                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 434                                         goto fail;
 435                                 }
 436                         }
 437
 438                         free(dbuf.dptr);
 439                         tdb_chainunlock(brl->w->tdb, kbuf);
 440                         return NT_STATUS_OK;
 441                 }
 442         }
 443
 444         /* we didn't find it */
 445         status = NT_STATUS_RANGE_NOT_LOCKED;
 446
 447  fail:
 448         free(dbuf.dptr);
 449         tdb_chainunlock(brl->w->tdb, kbuf);
 450         return status;
 451 }
 452
 453
 454 /*
 455   remove a pending lock. This is called when the caller has either
 456   given up trying to establish a lock or when they have succeeded in
 457   getting it. In either case they no longer need to be notified.
 458 */
 459 NTSTATUS brl_remove_pending(struct brl_context *brl,
 460                             DATA_BLOB *file_key,
 461                             void *notify_ptr)
 462 {
 463         TDB_DATA kbuf, dbuf;
 464         int count, i;
 465         struct lock_struct *locks;
 466         NTSTATUS status;
 467
 468         kbuf.dptr = file_key->data;
 469         kbuf.dsize = file_key->length;
 470
 471         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 472                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 473         }
 474
 475         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 476         if (!dbuf.dptr) {
 477                 tdb_chainunlock(brl->w->tdb, kbuf);
 478                 return NT_STATUS_RANGE_NOT_LOCKED;
 479         }
 480
 481         /* there are existing locks - find a match */
 482         locks = (struct lock_struct *)dbuf.dptr;
 483         count = dbuf.dsize / sizeof(*locks);
 484
 485         for (i=0; i<count; i++) {
 486                 struct lock_struct *lock = &locks[i];
 487
 488                 if (lock->notify_ptr == notify_ptr &&
 489                     lock->context.server == brl->server) {
 490                         /* found it - delete it */
 491                         if (count == 1) {
 492                                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 493                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 494                                         goto fail;
 495                                 }
 496                         } else {
 497                                 if (i < count-1) {
 498                                         memmove(&locks[i], &locks[i+1],
 499                                                 sizeof(*locks)*((count-1) - i));
 500                                 }
 501                                 count--;
 502                                 dbuf.dsize = count * sizeof(*locks);
 503                                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 504                                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 505                                         goto fail;
 506                                 }
 507                         }
 508
 509                         free(dbuf.dptr);
 510                         tdb_chainunlock(brl->w->tdb, kbuf);
 511                         return NT_STATUS_OK;
 512                 }
 513         }
 514
 515         /* we didn't find it */
 516         status = NT_STATUS_RANGE_NOT_LOCKED;
 517
 518  fail:
 519         free(dbuf.dptr);
 520         tdb_chainunlock(brl->w->tdb, kbuf);
 521         return status;
 522 }
 523
 524
 525 /*
 526   Test if we are allowed to perform IO on a region of an open file
 527 */
 528 NTSTATUS brl_locktest(struct brl_context *brl,
 529                       DATA_BLOB *file_key,
 530                       uint16_t fnum,
 531                       uint16 smbpid,
 532                       uint64_t start, uint64_t size,
 533                       enum brl_type lock_type)
 534 {
 535         TDB_DATA kbuf, dbuf;
 536         int count, i;
 537         struct lock_struct lock, *locks;
 538
 539         kbuf.dptr = file_key->data;
 540         kbuf.dsize = file_key->length;
 541
 542         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 543         if (dbuf.dptr == NULL) {
 544                 return NT_STATUS_OK;
 545         }
 546
 547         lock.context.smbpid = smbpid;
 548         lock.context.server = brl->server;
 549         lock.context.tid = brl->tid;
 550         lock.start = start;
 551         lock.size = size;
 552         lock.fnum = fnum;
 553         lock.lock_type = lock_type;
 554
 555         /* there are existing locks - make sure they don't conflict */
 556         locks = (struct lock_struct *)dbuf.dptr;
 557         count = dbuf.dsize / sizeof(*locks);
 558
 559         for (i=0; i<count; i++) {
 560                 if (brl_conflict_other(&locks[i], &lock)) {
 561                         free(dbuf.dptr);
 562                         return NT_STATUS_FILE_LOCK_CONFLICT;
 563                 }
 564         }
 565
 566         free(dbuf.dptr);
 567         return NT_STATUS_OK;
 568 }
 569
 570
 571 /*
 572  Remove any locks associated with a open file.
 573 */
 574 NTSTATUS brl_close(struct brl_context *brl,
 575                    DATA_BLOB *file_key, int fnum)
 576 {
 577         TDB_DATA kbuf, dbuf;
 578         int count, i, dcount=0;
 579         struct lock_struct *locks;
 580         NTSTATUS status;
 581
 582         kbuf.dptr = file_key->data;
 583         kbuf.dsize = file_key->length;
 584
 585         if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
 586                 return NT_STATUS_INTERNAL_DB_CORRUPTION;
 587         }
 588
 589         dbuf = tdb_fetch(brl->w->tdb, kbuf);
 590         if (!dbuf.dptr) {
 591                 tdb_chainunlock(brl->w->tdb, kbuf);
 592                 return NT_STATUS_OK;
 593         }
 594
 595         /* there are existing locks - remove any for this fnum */
 596         locks = (struct lock_struct *)dbuf.dptr;
 597         count = dbuf.dsize / sizeof(*locks);
 598
 599         for (i=0; i<count; i++) {
 600                 struct lock_struct *lock = &locks[i];
 601
 602                 if (lock->context.tid == brl->tid &&
 603                     lock->context.server == brl->server &&
 604                     lock->fnum == fnum) {
 605                         /* found it - delete it */
 606                         if (count > 1 && i < count-1) {
 607                                 memmove(&locks[i], &locks[i+1],
 608                                         sizeof(*locks)*((count-1) - i));
 609                         }
 610                         count--;
 611                         i--;
 612                         dcount++;
 613                 }
 614         }
 615
 616         status = NT_STATUS_OK;
 617
 618         if (count == 0) {
 619                 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
 620                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 621                 }
 622         } else if (dcount != 0) {
 623                 /* tell all pending lock holders for this file that
 624                    they have a chance now. This is a bit indiscriminant,
 625                    but works OK */
 626                 brl_notify_all(brl, locks, count);
 627
 628                 dbuf.dsize = count * sizeof(*locks);
 629
 630                 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 631                         status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 632                 }
 633         }
 634
 635         free(dbuf.dptr);
 636         tdb_chainunlock(brl->w->tdb, kbuf);
 637
 638         return status;
 639 }
 640