2 Unix SMB/CIFS implementation.
4 generic byte range locking code - tdb backend
6 Copyright (C) Andrew Tridgell 1992-2006
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 /* This module implements a tdb based byte range locking service,
24 replacing the fcntl() based byte range locking previously
25 used. This allows us to provide the same semantics as NT */
28 #include "system/filesys.h"
29 #include "lib/tdb/include/tdb.h"
30 #include "messaging/messaging.h"
31 #include "lib/dbwrap/dbwrap.h"
32 #include "lib/messaging/irpc.h"
33 #include "libcli/libcli.h"
34 #include "cluster/cluster.h"
35 #include "ntvfs/common/brlock.h"
36 #include "ntvfs/ntvfs.h"
39 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
40 a file. For a local posix filesystem this will usually be a combination
41 of the device and inode numbers of the file, but it can be anything
42 that uniquely idetifies a file for locking purposes, as long
43 as it is applied consistently.
46 /* this struct is typicaly attached to tcon */
48 struct db_context *db;
49 struct server_id server;
50 struct messaging_context *messaging_ctx;
54 the lock context contains the elements that define whether one
55 lock is the same as another lock
58 struct server_id server;
60 struct brl_context *ctx;
63 /* The data in brlock records is an unsorted linear array of these
64 records. It is unnecessary to store the count as tdb provides the
67 struct lock_context context;
68 struct ntvfs_handle *ntvfs;
71 enum brl_type lock_type;
75 /* this struct is attached to on oprn file handle */
78 struct ntvfs_handle *ntvfs;
79 struct lock_struct last_lock;
83 Open up the brlock.tdb database. Close it down using
84 talloc_free(). We need the messaging_ctx to allow for
85 pending lock notifications.
87 static struct brl_context *brl_tdb_init(TALLOC_CTX *mem_ctx, struct server_id server,
88 struct messaging_context *messaging_ctx)
90 struct brl_context *brl;
92 brl = talloc(mem_ctx, struct brl_context);
97 brl->db = db_tmp_open(brl, "brlock.tdb", TDB_DEFAULT);
98 if (brl->db == NULL) {
103 brl->server = server;
104 brl->messaging_ctx = messaging_ctx;
109 static struct brl_handle *brl_tdb_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs,
112 struct brl_handle *brlh;
114 brlh = talloc(mem_ctx, struct brl_handle);
119 brlh->key = *file_key;
121 ZERO_STRUCT(brlh->last_lock);
127 see if two locking contexts are equal
129 static bool brl_tdb_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
131 return (cluster_id_equal(&ctx1->server, &ctx2->server) &&
132 ctx1->smbpid == ctx2->smbpid &&
133 ctx1->ctx == ctx2->ctx);
137 see if lck1 and lck2 overlap
139 lck1 is the existing lock. lck2 is the new lock we are
142 static bool brl_tdb_overlap(struct lock_struct *lck1,
143 struct lock_struct *lck2)
145 /* this extra check is not redundent - it copes with locks
146 that go beyond the end of 64 bit file space */
147 if (lck1->size != 0 &&
148 lck1->start == lck2->start &&
149 lck1->size == lck2->size) {
153 if (lck1->start >= (lck2->start+lck2->size) ||
154 lck2->start >= (lck1->start+lck1->size)) {
158 /* we have a conflict. Now check to see if lck1 really still
159 * exists, which involves checking if the process still
160 * exists. We leave this test to last as its the most
161 * expensive test, especially when we are clustered */
162 /* TODO: need to do this via a server_id_exists() call, which
163 * hasn't been written yet. When clustered this will need to
170 See if lock2 can be added when lock1 is in place.
172 static bool brl_tdb_conflict(struct lock_struct *lck1,
173 struct lock_struct *lck2)
175 /* pending locks don't conflict with anything */
176 if (lck1->lock_type >= PENDING_READ_LOCK ||
177 lck2->lock_type >= PENDING_READ_LOCK) {
181 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
185 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
186 lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
190 return brl_tdb_overlap(lck1, lck2);
195 Check to see if this lock conflicts, but ignore our own locks on the
198 static bool brl_tdb_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
200 /* pending locks don't conflict with anything */
201 if (lck1->lock_type >= PENDING_READ_LOCK ||
202 lck2->lock_type >= PENDING_READ_LOCK) {
206 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
210 * note that incoming write calls conflict with existing READ
211 * locks even if the context is the same. JRA. See LOCKTEST7
214 if (brl_tdb_same_context(&lck1->context, &lck2->context) &&
215 lck1->ntvfs == lck2->ntvfs &&
216 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
220 return brl_tdb_overlap(lck1, lck2);
225 amazingly enough, w2k3 "remembers" whether the last lock failure
226 is the same as this one and changes its error code. I wonder if any
229 static NTSTATUS brl_tdb_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
232 * this function is only called for non pending lock!
235 /* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
236 if (lock->ntvfs->ctx->protocol == PROTOCOL_SMB2) {
237 return NT_STATUS_LOCK_NOT_GRANTED;
241 * if the notify_ptr is non NULL,
242 * it means that we're at the end of a pending lock
243 * and the real lock is requested after the timout went by
244 * In this case we need to remember the last_lock and always
245 * give FILE_LOCK_CONFLICT
247 if (lock->notify_ptr) {
248 brlh->last_lock = *lock;
249 return NT_STATUS_FILE_LOCK_CONFLICT;
253 * amazing the little things you learn with a test
254 * suite. Locks beyond this offset (as a 64 bit
255 * number!) always generate the conflict error code,
256 * unless the top bit is set
258 if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
259 brlh->last_lock = *lock;
260 return NT_STATUS_FILE_LOCK_CONFLICT;
264 * if the current lock matches the last failed lock on the file handle
265 * and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
267 if (cluster_id_equal(&lock->context.server, &brlh->last_lock.context.server) &&
268 lock->context.ctx == brlh->last_lock.context.ctx &&
269 lock->ntvfs == brlh->last_lock.ntvfs &&
270 lock->start == brlh->last_lock.start) {
271 return NT_STATUS_FILE_LOCK_CONFLICT;
274 brlh->last_lock = *lock;
275 return NT_STATUS_LOCK_NOT_GRANTED;
279 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
280 which case a real lock is first tried, and if that fails then a
281 pending lock is created. When the pending lock is triggered (by
282 someone else closing an overlapping lock range) a messaging
283 notification is sent, identified by the notify_ptr
285 static NTSTATUS brl_tdb_lock(struct brl_context *brl,
286 struct brl_handle *brlh,
288 uint64_t start, uint64_t size,
289 enum brl_type lock_type,
294 struct lock_struct lock, *locks=NULL;
296 struct db_record *rec = NULL;
298 /* if this is a pending lock, then with the chainlock held we
299 try to get the real lock. If we succeed then we don't need
300 to make it pending. This prevents a possible race condition
301 where the pending lock gets created after the lock that is
302 preventing the real lock gets removed */
303 if (lock_type >= PENDING_READ_LOCK) {
304 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
306 /* here we need to force that the last_lock isn't overwritten */
307 lock = brlh->last_lock;
308 status = brl_tdb_lock(brl, brlh, smbpid, start, size, rw, NULL);
309 brlh->last_lock = lock;
311 if (NT_STATUS_IS_OK(status)) {
316 kbuf.dptr = brlh->key.data;
317 kbuf.dsize = brlh->key.length;
319 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
321 return NT_STATUS_INTERNAL_DB_CORRUPTION;
327 lock.context.smbpid = smbpid;
328 lock.context.server = brl->server;
329 lock.context.ctx = brl;
330 lock.ntvfs = brlh->ntvfs;
331 lock.context.ctx = brl;
334 lock.lock_type = lock_type;
335 lock.notify_ptr = notify_ptr;
338 /* there are existing locks - make sure they don't conflict */
339 locks = (struct lock_struct *)dbuf.dptr;
340 count = dbuf.dsize / sizeof(*locks);
341 for (i=0; i<count; i++) {
342 if (brl_tdb_conflict(&locks[i], &lock)) {
343 status = brl_tdb_lock_failed(brlh, &lock);
349 /* no conflicts - add it to the list of locks */
350 locks = talloc_realloc(rec, locks, struct lock_struct, count+1);
352 status = NT_STATUS_NO_MEMORY;
355 dbuf.dptr = (uint8_t *)locks;
358 dbuf.dsize += sizeof(lock);
360 status = rec->store(rec, dbuf, TDB_REPLACE);
361 if (!NT_STATUS_IS_OK(status)) {
367 /* the caller needs to know if the real lock was granted. If
368 we have reached here then it must be a pending lock that
369 was granted, so tell them the lock failed */
370 if (lock_type >= PENDING_READ_LOCK) {
371 return NT_STATUS_LOCK_NOT_GRANTED;
383 we are removing a lock that might be holding up a pending lock. Scan for pending
384 locks that cover this range and if we find any then notify the server that it should
387 static void brl_tdb_notify_unlock(struct brl_context *brl,
388 struct lock_struct *locks, int count,
389 struct lock_struct *removed_lock)
393 /* the last_notice logic is to prevent stampeding on a lock
394 range. It prevents us sending hundreds of notifies on the
395 same range of bytes. It doesn't prevent all possible
396 stampedes, but it does prevent the most common problem */
399 for (i=0;i<count;i++) {
400 if (locks[i].lock_type >= PENDING_READ_LOCK &&
401 brl_tdb_overlap(&locks[i], removed_lock)) {
402 if (last_notice != -1 && brl_tdb_overlap(&locks[i], &locks[last_notice])) {
405 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
408 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
409 MSG_BRL_RETRY, locks[i].notify_ptr);
416 send notifications for all pending locks - the file is being closed by this
419 static void brl_tdb_notify_all(struct brl_context *brl,
420 struct lock_struct *locks, int count)
423 for (i=0;i<count;i++) {
424 if (locks->lock_type >= PENDING_READ_LOCK) {
425 brl_tdb_notify_unlock(brl, locks, count, &locks[i]);
433 Unlock a range of bytes.
435 static NTSTATUS brl_tdb_unlock(struct brl_context *brl,
436 struct brl_handle *brlh,
438 uint64_t start, uint64_t size)
442 struct lock_struct *locks, *lock;
443 struct lock_context context;
445 struct db_record *rec = NULL;
447 kbuf.dptr = brlh->key.data;
448 kbuf.dsize = brlh->key.length;
450 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
452 return NT_STATUS_INTERNAL_DB_CORRUPTION;
455 if (!rec->value.dptr) {
457 return NT_STATUS_RANGE_NOT_LOCKED;
462 context.smbpid = smbpid;
463 context.server = brl->server;
466 /* there are existing locks - find a match */
467 locks = (struct lock_struct *)dbuf.dptr;
468 count = dbuf.dsize / sizeof(*locks);
470 for (i=0; i<count; i++) {
472 if (brl_tdb_same_context(&lock->context, &context) &&
473 lock->ntvfs == brlh->ntvfs &&
474 lock->start == start &&
475 lock->size == size &&
476 lock->lock_type == WRITE_LOCK) {
480 if (i < count) goto found;
482 for (i=0; i<count; i++) {
484 if (brl_tdb_same_context(&lock->context, &context) &&
485 lock->ntvfs == brlh->ntvfs &&
486 lock->start == start &&
487 lock->size == size &&
488 lock->lock_type < PENDING_READ_LOCK) {
495 status = NT_STATUS_RANGE_NOT_LOCKED;
496 } else if (count == 1) {
497 status = rec->delete_rec(rec);
499 struct lock_struct removed_lock = *lock;
501 memmove(&locks[i], &locks[i+1],
502 sizeof(*locks)*((count-1) - i));
506 /* send notifications for any relevant pending locks */
507 brl_tdb_notify_unlock(brl, locks, count, &removed_lock);
509 dbuf.dsize = count * sizeof(*locks);
511 status = rec->store(rec, dbuf, TDB_REPLACE);
520 remove a pending lock. This is called when the caller has either
521 given up trying to establish a lock or when they have succeeded in
522 getting it. In either case they no longer need to be notified.
524 static NTSTATUS brl_tdb_remove_pending(struct brl_context *brl,
525 struct brl_handle *brlh,
530 struct lock_struct *locks;
532 struct db_record *rec = NULL;
534 kbuf.dptr = brlh->key.data;
535 kbuf.dsize = brlh->key.length;
537 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
539 return NT_STATUS_INTERNAL_DB_CORRUPTION;
544 /* there are existing locks - find a match */
545 locks = (struct lock_struct *)dbuf.dptr;
546 count = dbuf.dsize / sizeof(*locks);
548 status = NT_STATUS_RANGE_NOT_LOCKED;
550 for (i=0; i<count; i++) {
551 struct lock_struct *lock = &locks[i];
553 if (lock->lock_type >= PENDING_READ_LOCK &&
554 lock->notify_ptr == notify_ptr &&
555 cluster_id_equal(&lock->context.server, &brl->server)) {
556 /* found it - delete it */
558 status = rec->delete_rec(rec);
561 memmove(&locks[i], &locks[i+1],
562 sizeof(*locks)*((count-1) - i));
565 dbuf.dsize = count * sizeof(*locks);
566 status = rec->store(rec, dbuf, TDB_REPLACE);
578 Test if we are allowed to perform IO on a region of an open file
580 static NTSTATUS brl_tdb_locktest(struct brl_context *brl,
581 struct brl_handle *brlh,
583 uint64_t start, uint64_t size,
584 enum brl_type lock_type)
588 struct lock_struct lock, *locks;
591 kbuf.dptr = brlh->key.data;
592 kbuf.dsize = brlh->key.length;
594 if (brl->db->fetch(brl->db, brl, kbuf, &dbuf) != 0) {
598 lock.context.smbpid = smbpid;
599 lock.context.server = brl->server;
600 lock.context.ctx = brl;
601 lock.ntvfs = brlh->ntvfs;
604 lock.lock_type = lock_type;
606 /* there are existing locks - make sure they don't conflict */
607 locks = (struct lock_struct *)dbuf.dptr;
608 count = dbuf.dsize / sizeof(*locks);
610 status = NT_STATUS_OK;
612 for (i=0; i<count; i++) {
613 if (brl_tdb_conflict_other(&locks[i], &lock)) {
614 status = NT_STATUS_FILE_LOCK_CONFLICT;
619 talloc_free(dbuf.dptr);
625 Remove any locks associated with a open file.
627 static NTSTATUS brl_tdb_close(struct brl_context *brl,
628 struct brl_handle *brlh)
631 int count, i, dcount=0;
632 struct lock_struct *locks;
634 struct db_record *rec = NULL;
636 kbuf.dptr = brlh->key.data;
637 kbuf.dsize = brlh->key.length;
639 rec = brl->db->fetch_locked(brl->db, brl, kbuf);
641 return NT_STATUS_INTERNAL_DB_CORRUPTION;
650 /* there are existing locks - remove any for this fnum */
651 locks = (struct lock_struct *)dbuf.dptr;
652 count = dbuf.dsize / sizeof(*locks);
654 for (i=0; i<count; i++) {
655 struct lock_struct *lock = &locks[i];
657 if (lock->context.ctx == brl &&
658 cluster_id_equal(&lock->context.server, &brl->server) &&
659 lock->ntvfs == brlh->ntvfs) {
660 /* found it - delete it */
661 if (count > 1 && i < count-1) {
662 memmove(&locks[i], &locks[i+1],
663 sizeof(*locks)*((count-1) - i));
671 status = NT_STATUS_OK;
674 status = rec->delete_rec(rec);
675 } else if (dcount != 0) {
676 /* tell all pending lock holders for this file that
677 they have a chance now. This is a bit indiscriminant,
679 brl_tdb_notify_all(brl, locks, count);
681 dbuf.dsize = count * sizeof(*locks);
683 status = rec->store(rec, dbuf, TDB_REPLACE);
692 static const struct brlock_ops brlock_tdb_ops = {
693 .brl_init = brl_tdb_init,
694 .brl_create_handle = brl_tdb_create_handle,
695 .brl_lock = brl_tdb_lock,
696 .brl_unlock = brl_tdb_unlock,
697 .brl_remove_pending = brl_tdb_remove_pending,
698 .brl_locktest = brl_tdb_locktest,
699 .brl_close = brl_tdb_close
703 void brl_tdb_init_ops(void)
705 brl_set_ops(&brlock_tdb_ops);