2 Unix SMB/CIFS implementation.
4 generic byte range locking code
6 Copyright (C) Andrew Tridgell 1992-2004
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 /* This module implements a tdb based byte range locking service,
25 replacing the fcntl() based byte range locking previously
26 used. This allows us to provide the same semantics as NT */
29 #include "system/filesys.h"
30 #include "lib/tdb/include/tdb.h"
31 #include "messaging/messaging.h"
33 #include "lib/messaging/irpc.h"
34 #include "libcli/libcli.h"
37 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
38 a file. For a local posix filesystem this will usually be a combination
39 of the device and inode numbers of the file, but it can be anything
40 that uniquely idetifies a file for locking purposes, as long
41 as it is applied consistently.
46 the lock context contains the elements that define whether one
47 lock is the same as another lock
52 struct brl_context *ctx;
55 /* The data in brlock records is an unsorted linear array of these
56 records. It is unnecessary to store the count as tdb provides the
59 struct lock_context context;
60 struct ntvfs_handle *ntvfs;
63 enum brl_type lock_type;
67 /* this struct is attached to on oprn file handle */
70 struct ntvfs_handle *ntvfs;
71 struct lock_struct last_lock;
74 /* this struct is typicaly attached to tcon */
78 struct messaging_context *messaging_ctx;
82 Open up the brlock.tdb database. Close it down using
83 talloc_free(). We need the messaging_ctx to allow for
84 pending lock notifications.
86 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, uint32_t server,
87 struct messaging_context *messaging_ctx)
90 struct brl_context *brl;
92 brl = talloc(mem_ctx, struct brl_context);
97 path = smbd_tmp_path(brl, "brlock.tdb");
98 brl->w = tdb_wrap_open(brl, path, 0,
99 TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
101 if (brl->w == NULL) {
106 brl->server = server;
107 brl->messaging_ctx = messaging_ctx;
112 struct brl_handle *brl_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs, DATA_BLOB *file_key)
114 struct brl_handle *brlh;
116 brlh = talloc(mem_ctx, struct brl_handle);
121 brlh->key = *file_key;
123 ZERO_STRUCT(brlh->last_lock);
129 see if two locking contexts are equal
131 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
133 return (ctx1->server == ctx2->server &&
134 ctx1->smbpid == ctx2->smbpid &&
135 ctx1->ctx == ctx2->ctx);
139 see if lck1 and lck2 overlap
141 static BOOL brl_overlap(struct lock_struct *lck1,
142 struct lock_struct *lck2)
144 /* this extra check is not redundent - it copes with locks
145 that go beyond the end of 64 bit file space */
146 if (lck1->size != 0 &&
147 lck1->start == lck2->start &&
148 lck1->size == lck2->size) {
152 if (lck1->start >= (lck2->start+lck2->size) ||
153 lck2->start >= (lck1->start+lck1->size)) {
160 See if lock2 can be added when lock1 is in place.
162 static BOOL brl_conflict(struct lock_struct *lck1,
163 struct lock_struct *lck2)
165 /* pending locks don't conflict with anything */
166 if (lck1->lock_type >= PENDING_READ_LOCK ||
167 lck2->lock_type >= PENDING_READ_LOCK) {
171 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
175 if (brl_same_context(&lck1->context, &lck2->context) &&
176 lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
180 return brl_overlap(lck1, lck2);
185 Check to see if this lock conflicts, but ignore our own locks on the
188 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
190 /* pending locks don't conflict with anything */
191 if (lck1->lock_type >= PENDING_READ_LOCK ||
192 lck2->lock_type >= PENDING_READ_LOCK) {
196 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
200 * note that incoming write calls conflict with existing READ
201 * locks even if the context is the same. JRA. See LOCKTEST7
204 if (brl_same_context(&lck1->context, &lck2->context) &&
205 lck1->ntvfs == lck2->ntvfs &&
206 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
210 return brl_overlap(lck1, lck2);
215 amazingly enough, w2k3 "remembers" whether the last lock failure
216 is the same as this one and changes its error code. I wonder if any
219 static NTSTATUS brl_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
222 * this function is only called for non pending lock!
226 * if the notify_ptr is non NULL,
227 * it means that we're at the end of a pending lock
228 * and the real lock is requested after the timout went by
229 * In this case we need to remember the last_lock and always
230 * give FILE_LOCK_CONFLICT
232 if (lock->notify_ptr) {
233 brlh->last_lock = *lock;
234 return NT_STATUS_FILE_LOCK_CONFLICT;
238 * amazing the little things you learn with a test
239 * suite. Locks beyond this offset (as a 64 bit
240 * number!) always generate the conflict error code,
241 * unless the top bit is set
243 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
244 brlh->last_lock = *lock;
245 return NT_STATUS_FILE_LOCK_CONFLICT;
249 * if the current lock matches the last failed lock on the file handle
250 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
252 if (lock->context.server == brlh->last_lock.context.server &&
253 lock->context.ctx == brlh->last_lock.context.ctx &&
254 lock->ntvfs == brlh->last_lock.ntvfs &&
255 lock->start == brlh->last_lock.start) {
256 return NT_STATUS_FILE_LOCK_CONFLICT;
259 brlh->last_lock = *lock;
260 return NT_STATUS_LOCK_NOT_GRANTED;
264 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
265 which case a real lock is first tried, and if that fails then a
266 pending lock is created. When the pending lock is triggered (by
267 someone else closing an overlapping lock range) a messaging
268 notification is sent, identified by the notify_ptr
270 NTSTATUS brl_lock(struct brl_context *brl,
271 struct brl_handle *brlh,
273 uint64_t start, uint64_t size,
274 enum brl_type lock_type,
279 struct lock_struct lock, *locks=NULL;
282 kbuf.dptr = brlh->key.data;
283 kbuf.dsize = brlh->key.length;
285 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
286 return NT_STATUS_INTERNAL_DB_CORRUPTION;
289 /* if this is a pending lock, then with the chainlock held we
290 try to get the real lock. If we succeed then we don't need
291 to make it pending. This prevents a possible race condition
292 where the pending lock gets created after the lock that is
293 preventing the real lock gets removed */
294 if (lock_type >= PENDING_READ_LOCK) {
295 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
297 /* here we need to force that the last_lock isn't overwritten */
298 lock = brlh->last_lock;
299 status = brl_lock(brl, brlh, smbpid, start, size, rw, NULL);
300 brlh->last_lock = lock;
302 if (NT_STATUS_IS_OK(status)) {
303 tdb_chainunlock(brl->w->tdb, kbuf);
308 dbuf = tdb_fetch(brl->w->tdb, kbuf);
310 lock.context.smbpid = smbpid;
311 lock.context.server = brl->server;
312 lock.context.ctx = brl;
313 lock.ntvfs = brlh->ntvfs;
314 lock.context.ctx = brl;
317 lock.lock_type = lock_type;
318 lock.notify_ptr = notify_ptr;
321 /* there are existing locks - make sure they don't conflict */
322 locks = (struct lock_struct *)dbuf.dptr;
323 count = dbuf.dsize / sizeof(*locks);
324 for (i=0; i<count; i++) {
325 if (brl_conflict(&locks[i], &lock)) {
326 status = brl_lock_failed(brlh, &lock);
332 /* no conflicts - add it to the list of locks */
333 locks = realloc_p(locks, struct lock_struct, count+1);
335 status = NT_STATUS_NO_MEMORY;
338 dbuf.dptr = (uint8_t *)locks;
341 dbuf.dsize += sizeof(lock);
343 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
344 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
349 tdb_chainunlock(brl->w->tdb, kbuf);
351 /* the caller needs to know if the real lock was granted. If
352 we have reached here then it must be a pending lock that
353 was granted, so tell them the lock failed */
354 if (lock_type >= PENDING_READ_LOCK) {
355 return NT_STATUS_LOCK_NOT_GRANTED;
363 tdb_chainunlock(brl->w->tdb, kbuf);
369 we are removing a lock that might be holding up a pending lock. Scan for pending
370 locks that cover this range and if we find any then notify the server that it should
373 static void brl_notify_unlock(struct brl_context *brl,
374 struct lock_struct *locks, int count,
375 struct lock_struct *removed_lock)
379 /* the last_notice logic is to prevent stampeding on a lock
380 range. It prevents us sending hundreds of notifies on the
381 same range of bytes. It doesn't prevent all possible
382 stampedes, but it does prevent the most common problem */
385 for (i=0;i<count;i++) {
386 if (locks[i].lock_type >= PENDING_READ_LOCK &&
387 brl_overlap(&locks[i], removed_lock)) {
388 if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
391 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
394 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
395 MSG_BRL_RETRY, locks[i].notify_ptr);
402 send notifications for all pending locks - the file is being closed by this
405 static void brl_notify_all(struct brl_context *brl,
406 struct lock_struct *locks, int count)
409 for (i=0;i<count;i++) {
410 if (locks->lock_type >= PENDING_READ_LOCK) {
411 brl_notify_unlock(brl, locks, count, &locks[i]);
419 Unlock a range of bytes.
421 NTSTATUS brl_unlock(struct brl_context *brl,
422 struct brl_handle *brlh,
424 uint64_t start, uint64_t size)
428 struct lock_struct *locks;
429 struct lock_context context;
432 kbuf.dptr = brlh->key.data;
433 kbuf.dsize = brlh->key.length;
435 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
436 return NT_STATUS_INTERNAL_DB_CORRUPTION;
439 dbuf = tdb_fetch(brl->w->tdb, kbuf);
441 tdb_chainunlock(brl->w->tdb, kbuf);
442 return NT_STATUS_RANGE_NOT_LOCKED;
445 context.smbpid = smbpid;
446 context.server = brl->server;
449 /* there are existing locks - find a match */
450 locks = (struct lock_struct *)dbuf.dptr;
451 count = dbuf.dsize / sizeof(*locks);
453 for (i=0; i<count; i++) {
454 struct lock_struct *lock = &locks[i];
456 if (brl_same_context(&lock->context, &context) &&
457 lock->ntvfs == brlh->ntvfs &&
458 lock->start == start &&
459 lock->size == size &&
460 lock->lock_type < PENDING_READ_LOCK) {
461 /* found it - delete it */
463 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
464 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
468 struct lock_struct removed_lock = *lock;
470 memmove(&locks[i], &locks[i+1],
471 sizeof(*locks)*((count-1) - i));
475 /* send notifications for any relevant pending locks */
476 brl_notify_unlock(brl, locks, count, &removed_lock);
478 dbuf.dsize = count * sizeof(*locks);
480 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
481 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
487 tdb_chainunlock(brl->w->tdb, kbuf);
492 /* we didn't find it */
493 status = NT_STATUS_RANGE_NOT_LOCKED;
497 tdb_chainunlock(brl->w->tdb, kbuf);
503 remove a pending lock. This is called when the caller has either
504 given up trying to establish a lock or when they have succeeded in
505 getting it. In either case they no longer need to be notified.
507 NTSTATUS brl_remove_pending(struct brl_context *brl,
508 struct brl_handle *brlh,
513 struct lock_struct *locks;
516 kbuf.dptr = brlh->key.data;
517 kbuf.dsize = brlh->key.length;
519 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
520 return NT_STATUS_INTERNAL_DB_CORRUPTION;
523 dbuf = tdb_fetch(brl->w->tdb, kbuf);
525 tdb_chainunlock(brl->w->tdb, kbuf);
526 return NT_STATUS_RANGE_NOT_LOCKED;
529 /* there are existing locks - find a match */
530 locks = (struct lock_struct *)dbuf.dptr;
531 count = dbuf.dsize / sizeof(*locks);
533 for (i=0; i<count; i++) {
534 struct lock_struct *lock = &locks[i];
536 if (lock->lock_type >= PENDING_READ_LOCK &&
537 lock->notify_ptr == notify_ptr &&
538 lock->context.server == brl->server) {
539 /* found it - delete it */
541 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
542 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
547 memmove(&locks[i], &locks[i+1],
548 sizeof(*locks)*((count-1) - i));
551 dbuf.dsize = count * sizeof(*locks);
552 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
553 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
559 tdb_chainunlock(brl->w->tdb, kbuf);
564 /* we didn't find it */
565 status = NT_STATUS_RANGE_NOT_LOCKED;
569 tdb_chainunlock(brl->w->tdb, kbuf);
575 Test if we are allowed to perform IO on a region of an open file
577 NTSTATUS brl_locktest(struct brl_context *brl,
578 struct brl_handle *brlh,
580 uint64_t start, uint64_t size,
581 enum brl_type lock_type)
585 struct lock_struct lock, *locks;
587 kbuf.dptr = brlh->key.data;
588 kbuf.dsize = brlh->key.length;
590 dbuf = tdb_fetch(brl->w->tdb, kbuf);
591 if (dbuf.dptr == NULL) {
595 lock.context.smbpid = smbpid;
596 lock.context.server = brl->server;
597 lock.context.ctx = brl;
598 lock.ntvfs = brlh->ntvfs;
601 lock.lock_type = lock_type;
603 /* there are existing locks - make sure they don't conflict */
604 locks = (struct lock_struct *)dbuf.dptr;
605 count = dbuf.dsize / sizeof(*locks);
607 for (i=0; i<count; i++) {
608 if (brl_conflict_other(&locks[i], &lock)) {
610 return NT_STATUS_FILE_LOCK_CONFLICT;
620 Remove any locks associated with a open file.
622 NTSTATUS brl_close(struct brl_context *brl,
623 struct brl_handle *brlh)
626 int count, i, dcount=0;
627 struct lock_struct *locks;
630 kbuf.dptr = brlh->key.data;
631 kbuf.dsize = brlh->key.length;
633 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
634 return NT_STATUS_INTERNAL_DB_CORRUPTION;
637 dbuf = tdb_fetch(brl->w->tdb, kbuf);
639 tdb_chainunlock(brl->w->tdb, kbuf);
643 /* there are existing locks - remove any for this fnum */
644 locks = (struct lock_struct *)dbuf.dptr;
645 count = dbuf.dsize / sizeof(*locks);
647 for (i=0; i<count; i++) {
648 struct lock_struct *lock = &locks[i];
650 if (lock->context.ctx == brl &&
651 lock->context.server == brl->server &&
652 lock->ntvfs == brlh->ntvfs) {
653 /* found it - delete it */
654 if (count > 1 && i < count-1) {
655 memmove(&locks[i], &locks[i+1],
656 sizeof(*locks)*((count-1) - i));
664 status = NT_STATUS_OK;
667 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
668 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
670 } else if (dcount != 0) {
671 /* tell all pending lock holders for this file that
672 they have a chance now. This is a bit indiscriminant,
674 brl_notify_all(brl, locks, count);
676 dbuf.dsize = count * sizeof(*locks);
678 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
679 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
684 tdb_chainunlock(brl->w->tdb, kbuf);