2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
28 #include "system/filesys.h"
29 #include "lib/tdb/include/tdb.h"
30 #include "messaging/messaging.h"
31 #include "lib/dbwrap/dbwrap.h"
32 #include "lib/messaging/irpc.h"
33 #include "libcli/libcli.h"
34 #include "cluster/cluster.h"
35 #include "ntvfs/common/brlock.h"
36 #include "ntvfs/ntvfs.h"
37 #include "param/param.h"
40 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
41 a file. For a local posix filesystem this will usually be a combination
42 of the device and inode numbers of the file, but it can be anything
43 that uniquely idetifies a file for locking purposes, as long
44 as it is applied consistently.
47 /* this struct is typicaly attached to tcon */
49 struct db_context *db;
50 struct server_id server;
51 struct messaging_context *messaging_ctx;
55 the lock context contains the elements that define whether one
56 lock is the same as another lock
59 struct server_id server;
61 struct brl_context *ctx;
64 /* The data in brlock records is an unsorted linear array of these
65 records. It is unnecessary to store the count as tdb provides the
68 struct lock_context context;
69 struct ntvfs_handle *ntvfs;
72 enum brl_type lock_type;
76 /* this struct is attached to on oprn file handle */
79 struct ntvfs_handle *ntvfs;
80 struct lock_struct last_lock;
84 Open up the brlock.tdb database. Close it down using
85 talloc_free(). We need the messaging_ctx to allow for
86 pending lock notifications.
88 static struct brl_context *brl_tdb_init(TALLOC_CTX *mem_ctx, struct server_id server,
89 struct messaging_context *messaging_ctx)
91 struct brl_context *brl;
93 brl = talloc(mem_ctx, struct brl_context);
98 brl->db = db_tmp_open(brl, global_loadparm, "brlock.tdb", TDB_DEFAULT);
99 if (brl->db == NULL) {
104 brl->server = server;
105 brl->messaging_ctx = messaging_ctx;
110 static struct brl_handle *brl_tdb_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs,
113 struct brl_handle *brlh;
115 brlh = talloc(mem_ctx, struct brl_handle);
120 brlh->key = *file_key;
122 ZERO_STRUCT(brlh->last_lock);
128 see if two locking contexts are equal
130 static bool brl_tdb_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
132 return (cluster_id_equal(&ctx1->server, &ctx2->server) &&
133 ctx1->smbpid == ctx2->smbpid &&
134 ctx1->ctx == ctx2->ctx);
138 see if lck1 and lck2 overlap
140 lck1 is the existing lock. lck2 is the new lock we are
143 static bool brl_tdb_overlap(struct lock_struct *lck1,
144 struct lock_struct *lck2)
146 /* this extra check is not redundent - it copes with locks
147 that go beyond the end of 64 bit file space */
148 if (lck1->size != 0 &&
149 lck1->start == lck2->start &&
150 lck1->size == lck2->size) {
154 if (lck1->start >= (lck2->start+lck2->size) ||
155 lck2->start >= (lck1->start+lck1->size)) {
159 /* we have a conflict. Now check to see if lck1 really still
160 * exists, which involves checking if the process still
161 * exists. We leave this test to last as its the most
162 * expensive test, especially when we are clustered */
163 /* TODO: need to do this via a server_id_exists() call, which
164 * hasn't been written yet. When clustered this will need to
171 See if lock2 can be added when lock1 is in place.
173 static bool brl_tdb_conflict(struct lock_struct *lck1,
174 struct lock_struct *lck2)
176 /* pending locks don't conflict with anything */
177 if (lck1->lock_type >= PENDING_READ_LOCK ||
178 lck2->lock_type >= PENDING_READ_LOCK) {
182 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
186 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
187 lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
191 return brl_tdb_overlap(lck1, lck2);
196 Check to see if this lock conflicts, but ignore our own locks on the
199 static bool brl_tdb_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
201 /* pending locks don't conflict with anything */
202 if (lck1->lock_type >= PENDING_READ_LOCK ||
203 lck2->lock_type >= PENDING_READ_LOCK) {
207 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
211 * note that incoming write calls conflict with existing READ
212 * locks even if the context is the same. JRA. See LOCKTEST7
215 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
216 lck1->ntvfs == lck2->ntvfs &&
217 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
221 return brl_tdb_overlap(lck1, lck2);
226 amazingly enough, w2k3 "remembers" whether the last lock failure
227 is the same as this one and changes its error code. I wonder if any
230 static NTSTATUS brl_tdb_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
233 * this function is only called for non pending lock!
236 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
237 if (lock->ntvfs->ctx->protocol == PROTOCOL_SMB2) {
238 return NT_STATUS_LOCK_NOT_GRANTED;
242 * if the notify_ptr is non NULL,
243 * it means that we're at the end of a pending lock
244 * and the real lock is requested after the timout went by
245 * In this case we need to remember the last_lock and always
246 * give FILE_LOCK_CONFLICT
248 if (lock->notify_ptr) {
249 brlh->last_lock = *lock;
250 return NT_STATUS_FILE_LOCK_CONFLICT;
254 * amazing the little things you learn with a test
255 * suite. Locks beyond this offset (as a 64 bit
256 * number!) always generate the conflict error code,
257 * unless the top bit is set
259 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
260 brlh->last_lock = *lock;
261 return NT_STATUS_FILE_LOCK_CONFLICT;
265 * if the current lock matches the last failed lock on the file handle
266 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
268 if (cluster_id_equal(&lock->context.server, &brlh->last_lock.context.server) &&
269 lock->context.ctx == brlh->last_lock.context.ctx &&
270 lock->ntvfs == brlh->last_lock.ntvfs &&
271 lock->start == brlh->last_lock.start) {
272 return NT_STATUS_FILE_LOCK_CONFLICT;
275 brlh->last_lock = *lock;
276 return NT_STATUS_LOCK_NOT_GRANTED;
280 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
281 which case a real lock is first tried, and if that fails then a
282 pending lock is created. When the pending lock is triggered (by
283 someone else closing an overlapping lock range) a messaging
284 notification is sent, identified by the notify_ptr
286 static NTSTATUS brl_tdb_lock(struct brl_context *brl,
287 struct brl_handle *brlh,
289 uint64_t start, uint64_t size,
290 enum brl_type lock_type,
295 struct lock_struct lock, *locks=NULL;
297 struct db_record *rec = NULL;
299 /* if this is a pending lock, then with the chainlock held we
300 try to get the real lock. If we succeed then we don't need
301 to make it pending. This prevents a possible race condition
302 where the pending lock gets created after the lock that is
303 preventing the real lock gets removed */
304 if (lock_type >= PENDING_READ_LOCK) {
305 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
307 /* here we need to force that the last_lock isn't overwritten */
308 lock = brlh->last_lock;
309 status = brl_tdb_lock(brl, brlh, smbpid, start, size, rw, NULL);
310 brlh->last_lock = lock;
312 if (NT_STATUS_IS_OK(status)) {
317 kbuf.dptr = brlh->key.data;
318 kbuf.dsize = brlh->key.length;
320 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
322 return NT_STATUS_INTERNAL_DB_CORRUPTION;
328 lock.context.smbpid = smbpid;
329 lock.context.server = brl->server;
330 lock.context.ctx = brl;
331 lock.ntvfs = brlh->ntvfs;
332 lock.context.ctx = brl;
335 lock.lock_type = lock_type;
336 lock.notify_ptr = notify_ptr;
339 /* there are existing locks - make sure they don't conflict */
340 locks = (struct lock_struct *)dbuf.dptr;
341 count = dbuf.dsize / sizeof(*locks);
342 for (i=0; i<count; i++) {
343 if (brl_tdb_conflict(&locks[i], &lock)) {
344 status = brl_tdb_lock_failed(brlh, &lock);
350 /* no conflicts - add it to the list of locks */
351 locks = talloc_realloc(rec, locks, struct lock_struct, count+1);
353 status = NT_STATUS_NO_MEMORY;
356 dbuf.dptr = (uint8_t *)locks;
359 dbuf.dsize += sizeof(lock);
361 status = rec->store(rec, dbuf, TDB_REPLACE);
362 if (!NT_STATUS_IS_OK(status)) {
368 /* the caller needs to know if the real lock was granted. If
369 we have reached here then it must be a pending lock that
370 was granted, so tell them the lock failed */
371 if (lock_type >= PENDING_READ_LOCK) {
372 return NT_STATUS_LOCK_NOT_GRANTED;
384 we are removing a lock that might be holding up a pending lock. Scan for pending
385 locks that cover this range and if we find any then notify the server that it should
388 static void brl_tdb_notify_unlock(struct brl_context *brl,
389 struct lock_struct *locks, int count,
390 struct lock_struct *removed_lock)
394 /* the last_notice logic is to prevent stampeding on a lock
395 range. It prevents us sending hundreds of notifies on the
396 same range of bytes. It doesn't prevent all possible
397 stampedes, but it does prevent the most common problem */
400 for (i=0;i<count;i++) {
401 if (locks[i].lock_type >= PENDING_READ_LOCK &&
402 brl_tdb_overlap(&locks[i], removed_lock)) {
403 if (last_notice != -1 && brl_tdb_overlap(&locks[i], &locks[last_notice])) {
406 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
409 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
410 MSG_BRL_RETRY, locks[i].notify_ptr);
417 send notifications for all pending locks - the file is being closed by this
420 static void brl_tdb_notify_all(struct brl_context *brl,
421 struct lock_struct *locks, int count)
424 for (i=0;i<count;i++) {
425 if (locks->lock_type >= PENDING_READ_LOCK) {
426 brl_tdb_notify_unlock(brl, locks, count, &locks[i]);
434 Unlock a range of bytes.
436 static NTSTATUS brl_tdb_unlock(struct brl_context *brl,
437 struct brl_handle *brlh,
439 uint64_t start, uint64_t size)
443 struct lock_struct *locks, *lock;
444 struct lock_context context;
446 struct db_record *rec = NULL;
448 kbuf.dptr = brlh->key.data;
449 kbuf.dsize = brlh->key.length;
451 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
453 return NT_STATUS_INTERNAL_DB_CORRUPTION;
456 if (!rec->value.dptr) {
458 return NT_STATUS_RANGE_NOT_LOCKED;
463 context.smbpid = smbpid;
464 context.server = brl->server;
467 /* there are existing locks - find a match */
468 locks = (struct lock_struct *)dbuf.dptr;
469 count = dbuf.dsize / sizeof(*locks);
471 for (i=0; i<count; i++) {
473 if (brl_tdb_same_context(&lock->context, &context) &&
474 lock->ntvfs == brlh->ntvfs &&
475 lock->start == start &&
476 lock->size == size &&
477 lock->lock_type == WRITE_LOCK) {
481 if (i < count) goto found;
483 for (i=0; i<count; i++) {
485 if (brl_tdb_same_context(&lock->context, &context) &&
486 lock->ntvfs == brlh->ntvfs &&
487 lock->start == start &&
488 lock->size == size &&
489 lock->lock_type < PENDING_READ_LOCK) {
496 status = NT_STATUS_RANGE_NOT_LOCKED;
497 } else if (count == 1) {
498 status = rec->delete_rec(rec);
500 struct lock_struct removed_lock = *lock;
502 memmove(&locks[i], &locks[i+1],
503 sizeof(*locks)*((count-1) - i));
507 /* send notifications for any relevant pending locks */
508 brl_tdb_notify_unlock(brl, locks, count, &removed_lock);
510 dbuf.dsize = count * sizeof(*locks);
512 status = rec->store(rec, dbuf, TDB_REPLACE);
521 remove a pending lock. This is called when the caller has either
522 given up trying to establish a lock or when they have succeeded in
523 getting it. In either case they no longer need to be notified.
525 static NTSTATUS brl_tdb_remove_pending(struct brl_context *brl,
526 struct brl_handle *brlh,
531 struct lock_struct *locks;
533 struct db_record *rec = NULL;
535 kbuf.dptr = brlh->key.data;
536 kbuf.dsize = brlh->key.length;
538 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
540 return NT_STATUS_INTERNAL_DB_CORRUPTION;
545 /* there are existing locks - find a match */
546 locks = (struct lock_struct *)dbuf.dptr;
547 count = dbuf.dsize / sizeof(*locks);
549 status = NT_STATUS_RANGE_NOT_LOCKED;
551 for (i=0; i<count; i++) {
552 struct lock_struct *lock = &locks[i];
554 if (lock->lock_type >= PENDING_READ_LOCK &&
555 lock->notify_ptr == notify_ptr &&
556 cluster_id_equal(&lock->context.server, &brl->server)) {
557 /* found it - delete it */
559 status = rec->delete_rec(rec);
562 memmove(&locks[i], &locks[i+1],
563 sizeof(*locks)*((count-1) - i));
566 dbuf.dsize = count * sizeof(*locks);
567 status = rec->store(rec, dbuf, TDB_REPLACE);
579 Test if we are allowed to perform IO on a region of an open file
581 static NTSTATUS brl_tdb_locktest(struct brl_context *brl,
582 struct brl_handle *brlh,
584 uint64_t start, uint64_t size,
585 enum brl_type lock_type)
589 struct lock_struct lock, *locks;
592 kbuf.dptr = brlh->key.data;
593 kbuf.dsize = brlh->key.length;
595 if (brl->db->fetch(brl->db, brl, kbuf, &dbuf) != 0) {
599 lock.context.smbpid = smbpid;
600 lock.context.server = brl->server;
601 lock.context.ctx = brl;
602 lock.ntvfs = brlh->ntvfs;
605 lock.lock_type = lock_type;
607 /* there are existing locks - make sure they don't conflict */
608 locks = (struct lock_struct *)dbuf.dptr;
609 count = dbuf.dsize / sizeof(*locks);
611 status = NT_STATUS_OK;
613 for (i=0; i<count; i++) {
614 if (brl_tdb_conflict_other(&locks[i], &lock)) {
615 status = NT_STATUS_FILE_LOCK_CONFLICT;
620 talloc_free(dbuf.dptr);
626 Remove any locks associated with a open file.
628 static NTSTATUS brl_tdb_close(struct brl_context *brl,
629 struct brl_handle *brlh)
632 int count, i, dcount=0;
633 struct lock_struct *locks;
635 struct db_record *rec = NULL;
637 kbuf.dptr = brlh->key.data;
638 kbuf.dsize = brlh->key.length;
640 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
642 return NT_STATUS_INTERNAL_DB_CORRUPTION;
651 /* there are existing locks - remove any for this fnum */
652 locks = (struct lock_struct *)dbuf.dptr;
653 count = dbuf.dsize / sizeof(*locks);
655 for (i=0; i<count; i++) {
656 struct lock_struct *lock = &locks[i];
658 if (lock->context.ctx == brl &&
659 cluster_id_equal(&lock->context.server, &brl->server) &&
660 lock->ntvfs == brlh->ntvfs) {
661 /* found it - delete it */
662 if (count > 1 && i < count-1) {
663 memmove(&locks[i], &locks[i+1],
664 sizeof(*locks)*((count-1) - i));
672 status = NT_STATUS_OK;
675 status = rec->delete_rec(rec);
676 } else if (dcount != 0) {
677 /* tell all pending lock holders for this file that
678 they have a chance now. This is a bit indiscriminant,
680 brl_tdb_notify_all(brl, locks, count);
682 dbuf.dsize = count * sizeof(*locks);
684 status = rec->store(rec, dbuf, TDB_REPLACE);
693 static const struct brlock_ops brlock_tdb_ops = {
694 .brl_init = brl_tdb_init,
695 .brl_create_handle = brl_tdb_create_handle,
696 .brl_lock = brl_tdb_lock,
697 .brl_unlock = brl_tdb_unlock,
698 .brl_remove_pending = brl_tdb_remove_pending,
699 .brl_locktest = brl_tdb_locktest,
700 .brl_close = brl_tdb_close
704 void brl_tdb_init_ops(void)
706 brl_set_ops(&brlock_tdb_ops);