2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 /* This module implements a tdb based byte range locking service,
25 replacing the fcntl() based byte range locking previously
26 used. This allows us to provide the same semantics as NT */
29 #include "system/filesys.h"
30 #include "lib/tdb/include/tdb.h"
31 #include "messaging/messaging.h"
33 #include "lib/messaging/irpc.h"
34 #include "libcli/libcli.h"
35 #include "cluster/cluster.h"
36 #include "ntvfs/common/brlock.h"
37 #include "ntvfs/ntvfs.h"
40 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
41 a file. For a local posix filesystem this will usually be a combination
42 of the device and inode numbers of the file, but it can be anything
43 that uniquely idetifies a file for locking purposes, as long
44 as it is applied consistently.
47 /* this struct is typicaly attached to tcon */
50 struct server_id server;
51 struct messaging_context *messaging_ctx;
55 the lock context contains the elements that define whether one
56 lock is the same as another lock
59 struct server_id server;
61 struct brl_context *ctx;
64 /* The data in brlock records is an unsorted linear array of these
65 records. It is unnecessary to store the count as tdb provides the
68 struct lock_context context;
69 struct ntvfs_handle *ntvfs;
72 enum brl_type lock_type;
76 /* this struct is attached to on oprn file handle */
79 struct ntvfs_handle *ntvfs;
80 struct lock_struct last_lock;
84 Open up the brlock.tdb database. Close it down using
85 talloc_free(). We need the messaging_ctx to allow for
86 pending lock notifications.
88 static struct brl_context *brl_tdb_init(TALLOC_CTX *mem_ctx, struct server_id server,
89 struct messaging_context *messaging_ctx)
91 struct brl_context *brl;
93 brl = talloc(mem_ctx, struct brl_context);
98 brl->w = cluster_tdb_tmp_open(brl, "brlock.tdb", TDB_DEFAULT);
104 brl->server = server;
105 brl->messaging_ctx = messaging_ctx;
110 static struct brl_handle *brl_tdb_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs,
113 struct brl_handle *brlh;
115 brlh = talloc(mem_ctx, struct brl_handle);
120 brlh->key = *file_key;
122 ZERO_STRUCT(brlh->last_lock);
128 see if two locking contexts are equal
130 static BOOL brl_tdb_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
132 return (cluster_id_equal(&ctx1->server, &ctx2->server) &&
133 ctx1->smbpid == ctx2->smbpid &&
134 ctx1->ctx == ctx2->ctx);
138 see if lck1 and lck2 overlap
140 static BOOL brl_tdb_overlap(struct lock_struct *lck1,
141 struct lock_struct *lck2)
143 /* this extra check is not redundent - it copes with locks
144 that go beyond the end of 64 bit file space */
145 if (lck1->size != 0 &&
146 lck1->start == lck2->start &&
147 lck1->size == lck2->size) {
151 if (lck1->start >= (lck2->start+lck2->size) ||
152 lck2->start >= (lck1->start+lck1->size)) {
159 See if lock2 can be added when lock1 is in place.
161 static BOOL brl_tdb_conflict(struct lock_struct *lck1,
162 struct lock_struct *lck2)
164 /* pending locks don't conflict with anything */
165 if (lck1->lock_type >= PENDING_READ_LOCK ||
166 lck2->lock_type >= PENDING_READ_LOCK) {
170 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
174 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
175 lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
179 return brl_tdb_overlap(lck1, lck2);
184 Check to see if this lock conflicts, but ignore our own locks on the
187 static BOOL brl_tdb_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
189 /* pending locks don't conflict with anything */
190 if (lck1->lock_type >= PENDING_READ_LOCK ||
191 lck2->lock_type >= PENDING_READ_LOCK) {
195 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
199 * note that incoming write calls conflict with existing READ
200 * locks even if the context is the same. JRA. See LOCKTEST7
203 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
204 lck1->ntvfs == lck2->ntvfs &&
205 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
209 return brl_tdb_overlap(lck1, lck2);
214 amazingly enough, w2k3 "remembers" whether the last lock failure
215 is the same as this one and changes its error code. I wonder if any
218 static NTSTATUS brl_tdb_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
221 * this function is only called for non pending lock!
224 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
225 if (lock->ntvfs->ctx->protocol == PROTOCOL_SMB2) {
226 return NT_STATUS_LOCK_NOT_GRANTED;
230 * if the notify_ptr is non NULL,
231 * it means that we're at the end of a pending lock
232 * and the real lock is requested after the timout went by
233 * In this case we need to remember the last_lock and always
234 * give FILE_LOCK_CONFLICT
236 if (lock->notify_ptr) {
237 brlh->last_lock = *lock;
238 return NT_STATUS_FILE_LOCK_CONFLICT;
242 * amazing the little things you learn with a test
243 * suite. Locks beyond this offset (as a 64 bit
244 * number!) always generate the conflict error code,
245 * unless the top bit is set
247 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
248 brlh->last_lock = *lock;
249 return NT_STATUS_FILE_LOCK_CONFLICT;
253 * if the current lock matches the last failed lock on the file handle
254 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
256 if (cluster_id_equal(&lock->context.server, &brlh->last_lock.context.server) &&
257 lock->context.ctx == brlh->last_lock.context.ctx &&
258 lock->ntvfs == brlh->last_lock.ntvfs &&
259 lock->start == brlh->last_lock.start) {
260 return NT_STATUS_FILE_LOCK_CONFLICT;
263 brlh->last_lock = *lock;
264 return NT_STATUS_LOCK_NOT_GRANTED;
268 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
269 which case a real lock is first tried, and if that fails then a
270 pending lock is created. When the pending lock is triggered (by
271 someone else closing an overlapping lock range) a messaging
272 notification is sent, identified by the notify_ptr
274 static NTSTATUS brl_tdb_lock(struct brl_context *brl,
275 struct brl_handle *brlh,
277 uint64_t start, uint64_t size,
278 enum brl_type lock_type,
283 struct lock_struct lock, *locks=NULL;
286 kbuf.dptr = brlh->key.data;
287 kbuf.dsize = brlh->key.length;
289 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
290 return NT_STATUS_INTERNAL_DB_CORRUPTION;
293 /* if this is a pending lock, then with the chainlock held we
294 try to get the real lock. If we succeed then we don't need
295 to make it pending. This prevents a possible race condition
296 where the pending lock gets created after the lock that is
297 preventing the real lock gets removed */
298 if (lock_type >= PENDING_READ_LOCK) {
299 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
301 /* here we need to force that the last_lock isn't overwritten */
302 lock = brlh->last_lock;
303 status = brl_tdb_lock(brl, brlh, smbpid, start, size, rw, NULL);
304 brlh->last_lock = lock;
306 if (NT_STATUS_IS_OK(status)) {
307 tdb_chainunlock(brl->w->tdb, kbuf);
312 dbuf = tdb_fetch(brl->w->tdb, kbuf);
314 lock.context.smbpid = smbpid;
315 lock.context.server = brl->server;
316 lock.context.ctx = brl;
317 lock.ntvfs = brlh->ntvfs;
318 lock.context.ctx = brl;
321 lock.lock_type = lock_type;
322 lock.notify_ptr = notify_ptr;
325 /* there are existing locks - make sure they don't conflict */
326 locks = (struct lock_struct *)dbuf.dptr;
327 count = dbuf.dsize / sizeof(*locks);
328 for (i=0; i<count; i++) {
329 if (brl_tdb_conflict(&locks[i], &lock)) {
330 status = brl_tdb_lock_failed(brlh, &lock);
336 /* no conflicts - add it to the list of locks */
337 locks = realloc_p(locks, struct lock_struct, count+1);
339 status = NT_STATUS_NO_MEMORY;
342 dbuf.dptr = (uint8_t *)locks;
345 dbuf.dsize += sizeof(lock);
347 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
348 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
353 tdb_chainunlock(brl->w->tdb, kbuf);
355 /* the caller needs to know if the real lock was granted. If
356 we have reached here then it must be a pending lock that
357 was granted, so tell them the lock failed */
358 if (lock_type >= PENDING_READ_LOCK) {
359 return NT_STATUS_LOCK_NOT_GRANTED;
367 tdb_chainunlock(brl->w->tdb, kbuf);
373 we are removing a lock that might be holding up a pending lock. Scan for pending
374 locks that cover this range and if we find any then notify the server that it should
377 static void brl_tdb_notify_unlock(struct brl_context *brl,
378 struct lock_struct *locks, int count,
379 struct lock_struct *removed_lock)
383 /* the last_notice logic is to prevent stampeding on a lock
384 range. It prevents us sending hundreds of notifies on the
385 same range of bytes. It doesn't prevent all possible
386 stampedes, but it does prevent the most common problem */
389 for (i=0;i<count;i++) {
390 if (locks[i].lock_type >= PENDING_READ_LOCK &&
391 brl_tdb_overlap(&locks[i], removed_lock)) {
392 if (last_notice != -1 && brl_tdb_overlap(&locks[i], &locks[last_notice])) {
395 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
398 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
399 MSG_BRL_RETRY, locks[i].notify_ptr);
406 send notifications for all pending locks - the file is being closed by this
409 static void brl_tdb_notify_all(struct brl_context *brl,
410 struct lock_struct *locks, int count)
413 for (i=0;i<count;i++) {
414 if (locks->lock_type >= PENDING_READ_LOCK) {
415 brl_tdb_notify_unlock(brl, locks, count, &locks[i]);
423 Unlock a range of bytes.
425 static NTSTATUS brl_tdb_unlock(struct brl_context *brl,
426 struct brl_handle *brlh,
428 uint64_t start, uint64_t size)
432 struct lock_struct *locks, *lock;
433 struct lock_context context;
436 kbuf.dptr = brlh->key.data;
437 kbuf.dsize = brlh->key.length;
439 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
440 return NT_STATUS_INTERNAL_DB_CORRUPTION;
443 dbuf = tdb_fetch(brl->w->tdb, kbuf);
445 tdb_chainunlock(brl->w->tdb, kbuf);
446 return NT_STATUS_RANGE_NOT_LOCKED;
449 context.smbpid = smbpid;
450 context.server = brl->server;
453 /* there are existing locks - find a match */
454 locks = (struct lock_struct *)dbuf.dptr;
455 count = dbuf.dsize / sizeof(*locks);
457 for (i=0; i<count; i++) {
459 if (brl_tdb_same_context(&lock->context, &context) &&
460 lock->ntvfs == brlh->ntvfs &&
461 lock->start == start &&
462 lock->size == size &&
463 lock->lock_type == WRITE_LOCK) {
467 if (i < count) goto found;
469 for (i=0; i<count; i++) {
471 if (brl_tdb_same_context(&lock->context, &context) &&
472 lock->ntvfs == brlh->ntvfs &&
473 lock->start == start &&
474 lock->size == size &&
475 lock->lock_type < PENDING_READ_LOCK) {
482 /* found it - delete it */
484 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
485 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
489 struct lock_struct removed_lock = *lock;
491 memmove(&locks[i], &locks[i+1],
492 sizeof(*locks)*((count-1) - i));
496 /* send notifications for any relevant pending locks */
497 brl_tdb_notify_unlock(brl, locks, count, &removed_lock);
499 dbuf.dsize = count * sizeof(*locks);
501 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
502 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
508 tdb_chainunlock(brl->w->tdb, kbuf);
512 /* we didn't find it */
513 status = NT_STATUS_RANGE_NOT_LOCKED;
517 tdb_chainunlock(brl->w->tdb, kbuf);
523 remove a pending lock. This is called when the caller has either
524 given up trying to establish a lock or when they have succeeded in
525 getting it. In either case they no longer need to be notified.
527 static NTSTATUS brl_tdb_remove_pending(struct brl_context *brl,
528 struct brl_handle *brlh,
533 struct lock_struct *locks;
536 kbuf.dptr = brlh->key.data;
537 kbuf.dsize = brlh->key.length;
539 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
540 return NT_STATUS_INTERNAL_DB_CORRUPTION;
543 dbuf = tdb_fetch(brl->w->tdb, kbuf);
545 tdb_chainunlock(brl->w->tdb, kbuf);
546 return NT_STATUS_RANGE_NOT_LOCKED;
549 /* there are existing locks - find a match */
550 locks = (struct lock_struct *)dbuf.dptr;
551 count = dbuf.dsize / sizeof(*locks);
553 for (i=0; i<count; i++) {
554 struct lock_struct *lock = &locks[i];
556 if (lock->lock_type >= PENDING_READ_LOCK &&
557 lock->notify_ptr == notify_ptr &&
558 cluster_id_equal(&lock->context.server, &brl->server)) {
559 /* found it - delete it */
561 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
562 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
567 memmove(&locks[i], &locks[i+1],
568 sizeof(*locks)*((count-1) - i));
571 dbuf.dsize = count * sizeof(*locks);
572 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
573 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
579 tdb_chainunlock(brl->w->tdb, kbuf);
584 /* we didn't find it */
585 status = NT_STATUS_RANGE_NOT_LOCKED;
589 tdb_chainunlock(brl->w->tdb, kbuf);
595 Test if we are allowed to perform IO on a region of an open file
597 static NTSTATUS brl_tdb_locktest(struct brl_context *brl,
598 struct brl_handle *brlh,
600 uint64_t start, uint64_t size,
601 enum brl_type lock_type)
605 struct lock_struct lock, *locks;
607 kbuf.dptr = brlh->key.data;
608 kbuf.dsize = brlh->key.length;
610 dbuf = tdb_fetch(brl->w->tdb, kbuf);
611 if (dbuf.dptr == NULL) {
615 lock.context.smbpid = smbpid;
616 lock.context.server = brl->server;
617 lock.context.ctx = brl;
618 lock.ntvfs = brlh->ntvfs;
621 lock.lock_type = lock_type;
623 /* there are existing locks - make sure they don't conflict */
624 locks = (struct lock_struct *)dbuf.dptr;
625 count = dbuf.dsize / sizeof(*locks);
627 for (i=0; i<count; i++) {
628 if (brl_tdb_conflict_other(&locks[i], &lock)) {
630 return NT_STATUS_FILE_LOCK_CONFLICT;
640 Remove any locks associated with a open file.
642 static NTSTATUS brl_tdb_close(struct brl_context *brl,
643 struct brl_handle *brlh)
646 int count, i, dcount=0;
647 struct lock_struct *locks;
650 kbuf.dptr = brlh->key.data;
651 kbuf.dsize = brlh->key.length;
653 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
654 return NT_STATUS_INTERNAL_DB_CORRUPTION;
657 dbuf = tdb_fetch(brl->w->tdb, kbuf);
659 tdb_chainunlock(brl->w->tdb, kbuf);
663 /* there are existing locks - remove any for this fnum */
664 locks = (struct lock_struct *)dbuf.dptr;
665 count = dbuf.dsize / sizeof(*locks);
667 for (i=0; i<count; i++) {
668 struct lock_struct *lock = &locks[i];
670 if (lock->context.ctx == brl &&
671 cluster_id_equal(&lock->context.server, &brl->server) &&
672 lock->ntvfs == brlh->ntvfs) {
673 /* found it - delete it */
674 if (count > 1 && i < count-1) {
675 memmove(&locks[i], &locks[i+1],
676 sizeof(*locks)*((count-1) - i));
684 status = NT_STATUS_OK;
687 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
688 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
690 } else if (dcount != 0) {
691 /* tell all pending lock holders for this file that
692 they have a chance now. This is a bit indiscriminant,
694 brl_tdb_notify_all(brl, locks, count);
696 dbuf.dsize = count * sizeof(*locks);
698 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
699 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
704 tdb_chainunlock(brl->w->tdb, kbuf);
710 static const struct brlock_ops brlock_tdb_ops = {
711 .brl_init = brl_tdb_init,
712 .brl_create_handle = brl_tdb_create_handle,
713 .brl_lock = brl_tdb_lock,
714 .brl_unlock = brl_tdb_unlock,
715 .brl_remove_pending = brl_tdb_remove_pending,
716 .brl_locktest = brl_tdb_locktest,
717 .brl_close = brl_tdb_close
721 void brl_tdb_init_ops(void)
723 brl_set_ops(&brlock_tdb_ops);