2 Unix SMB/CIFS implementation.
4 generic byte range locking code
6 Copyright (C) Andrew Tridgell 1992-2004
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 /* This module implements a tdb based byte range locking service,
25 replacing the fcntl() based byte range locking previously
26 used. This allows us to provide the same semantics as NT */
31 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
32 a file. For a local posix filesystem this will usually be a combination
33 of the device and inode numbers of the file, but it can be anything
34 that uniquely idetifies a file for locking purposes, as long
35 as it is applied consistently.
39 the lock context contains the elements that define whether one
40 lock is the same as another lock
48 /* The data in brlock records is an unsorted linear array of these
49 records. It is unnecessary to store the count as tdb provides the
52 struct lock_context context;
56 enum brl_type lock_type;
65 struct lock_struct last_lock_failure;
70 Open up the brlock.tdb database. Close it down using
71 talloc_free(). We need the messaging_ctx to allow for
72 pending lock notifications.
74 void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid,
78 struct brl_context *brl;
80 brl = talloc_p(mem_ctx, struct brl_context);
85 path = lock_path(brl, "brlock.tdb");
86 brl->w = tdb_wrap_open(brl, path, 0,
87 TDB_DEFAULT|TDB_CLEAR_IF_FIRST,
88 O_RDWR|O_CREAT, 0600);
97 brl->messaging_ctx = messaging_ctx;
98 ZERO_STRUCT(brl->last_lock_failure);
105 see if two locking contexts are equal
107 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
109 return (ctx1->server == ctx2->server &&
110 ctx1->smbpid == ctx2->smbpid &&
111 ctx1->tid == ctx2->tid);
115 see if lck1 and lck2 overlap
117 static BOOL brl_overlap(struct lock_struct *lck1,
118 struct lock_struct *lck2)
120 if (lck1->start >= (lck2->start + lck2->size) ||
121 lck2->start >= (lck1->start + lck1->size)) {
128 See if lock2 can be added when lock1 is in place.
130 static BOOL brl_conflict(struct lock_struct *lck1,
131 struct lock_struct *lck2)
133 /* pending locks don't conflict with anything */
134 if (lck1->lock_type >= PENDING_READ_LOCK ||
135 lck2->lock_type >= PENDING_READ_LOCK) {
139 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
143 if (brl_same_context(&lck1->context, &lck2->context) &&
144 lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
148 return brl_overlap(lck1, lck2);
153 Check to see if this lock conflicts, but ignore our own locks on the
156 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
158 /* pending locks don't conflict with anything */
159 if (lck1->lock_type >= PENDING_READ_LOCK ||
160 lck2->lock_type >= PENDING_READ_LOCK) {
164 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
168 * note that incoming write calls conflict with existing READ
169 * locks even if the context is the same. JRA. See LOCKTEST7
172 if (brl_same_context(&lck1->context, &lck2->context) &&
173 lck1->fnum == lck2->fnum &&
174 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
178 return brl_overlap(lck1, lck2);
183 amazingly enough, w2k3 "remembers" whether the last lock failure
184 is the same as this one and changes its error code. I wonder if any
187 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
189 if (brl_same_context(&lock->context, &brl->last_lock_failure.context) &&
190 lock->fnum == brl->last_lock_failure.fnum &&
191 lock->start == brl->last_lock_failure.start &&
192 lock->size == brl->last_lock_failure.size) {
193 return NT_STATUS_FILE_LOCK_CONFLICT;
195 brl->last_lock_failure = *lock;
196 if (lock->start >= 0xEF000000) {
197 /* amazing the little things you learn with a test
198 suite. Locks beyond this offset (as a 64 bit
199 number!) always generate the conflict error
201 return NT_STATUS_FILE_LOCK_CONFLICT;
203 return NT_STATUS_LOCK_NOT_GRANTED;
207 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
208 which case a real lock is first tried, and if that fails then a
209 pending lock is created. When the pending lock is triggered (by
210 someone else closing an overlapping lock range) a messaging
211 notification is sent, identified by the notify_ptr
213 NTSTATUS brl_lock(void *brl_ctx,
217 uint64_t start, uint64_t size,
218 enum brl_type lock_type,
221 struct brl_context *brl = brl_ctx;
224 struct lock_struct lock, *locks;
228 kbuf.dptr = file_key->data;
229 kbuf.dsize = file_key->length;
231 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
232 return NT_STATUS_INTERNAL_DB_CORRUPTION;
235 /* if this is a pending lock, then with the chainlock held we
236 try to get the real lock. If we succeed then we don't need
237 to make it pending. This prevents a possible race condition
238 where the pending lock gets created after the lock that is
239 preventing the real lock gets removed */
240 if (lock_type >= PENDING_READ_LOCK) {
241 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
242 status = brl_lock(brl_ctx, file_key, smbpid, fnum, start, size, rw, NULL);
243 if (NT_STATUS_IS_OK(status)) {
244 tdb_chainunlock(brl->w->tdb, kbuf);
249 dbuf = tdb_fetch(brl->w->tdb, kbuf);
251 lock.context.smbpid = smbpid;
252 lock.context.server = brl->server;
253 lock.context.tid = brl->tid;
257 lock.lock_type = lock_type;
258 lock.notify_ptr = notify_ptr;
261 /* there are existing locks - make sure they don't conflict */
262 locks = (struct lock_struct *)dbuf.dptr;
263 count = dbuf.dsize / sizeof(*locks);
264 for (i=0; i<count; i++) {
265 if (brl_conflict(&locks[i], &lock)) {
266 status = brl_lock_failed(brl, &lock);
272 /* no conflicts - add it to the list of locks */
273 tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*locks));
275 status = NT_STATUS_NO_MEMORY;
280 memcpy(dbuf.dptr + dbuf.dsize, &lock, sizeof(lock));
281 dbuf.dsize += sizeof(lock);
283 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
284 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
289 tdb_chainunlock(brl->w->tdb, kbuf);
291 /* the caller needs to know if the real lock was granted. If
292 we have reached here then it must be a pending lock that
293 was granted, so tell them the lock failed */
294 if (lock_type >= PENDING_READ_LOCK) {
295 return brl_lock_failed(brl, &lock);
303 tdb_chainunlock(brl->w->tdb, kbuf);
309 we are removing a lock that might be holding up a pending lock. Scan for pending
310 locks that cover this range and if we find any then notify the server that it should
313 static void brl_notify_unlock(struct brl_context *brl,
314 struct lock_struct *locks, int count,
315 struct lock_struct *removed_lock)
319 /* the last_notice logic is to prevent stampeding on a lock
320 range. It prevents us sending hundreds of notifies on the
321 same range of bytes. It doesn't prevent all possible
322 stampedes, but it does prevent the most common problem */
325 for (i=0;i<count;i++) {
326 if (locks[i].lock_type >= PENDING_READ_LOCK &&
327 brl_overlap(&locks[i], removed_lock)) {
330 if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
334 data.data = (void *)&locks[i].notify_ptr;
335 data.length = sizeof(void *);
336 messaging_send(brl->messaging_ctx, locks[i].context.server, MSG_BRL_RETRY, &data);
343 send notifications for all pending locks - the file is being closed by this
346 static void brl_notify_all(struct brl_context *brl,
347 struct lock_struct *locks, int count)
350 for (i=0;i<count;i++) {
351 if (locks->lock_type >= PENDING_READ_LOCK) {
352 brl_notify_unlock(brl, locks, count, &locks[i]);
360 Unlock a range of bytes.
362 NTSTATUS brl_unlock(void *brl_ctx,
366 uint64_t start, uint64_t size)
368 struct brl_context *brl = brl_ctx;
371 struct lock_struct *locks;
372 struct lock_context context;
375 kbuf.dptr = file_key->data;
376 kbuf.dsize = file_key->length;
378 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
379 return NT_STATUS_INTERNAL_DB_CORRUPTION;
382 dbuf = tdb_fetch(brl->w->tdb, kbuf);
384 tdb_chainunlock(brl->w->tdb, kbuf);
385 return NT_STATUS_RANGE_NOT_LOCKED;
388 context.smbpid = smbpid;
389 context.server = brl->server;
390 context.tid = brl->tid;
392 /* there are existing locks - find a match */
393 locks = (struct lock_struct *)dbuf.dptr;
394 count = dbuf.dsize / sizeof(*locks);
396 for (i=0; i<count; i++) {
397 struct lock_struct *lock = &locks[i];
399 if (brl_same_context(&lock->context, &context) &&
400 lock->fnum == fnum &&
401 lock->start == start &&
402 lock->size == size &&
403 lock->notify_ptr == NULL) {
404 /* found it - delete it */
406 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
407 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
411 struct lock_struct removed_lock = *lock;
413 memmove(&locks[i], &locks[i+1],
414 sizeof(*locks)*((count-1) - i));
418 /* send notifications for any relevant pending locks */
419 brl_notify_unlock(brl, locks, count, &removed_lock);
421 dbuf.dsize = count * sizeof(*locks);
423 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
424 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
430 tdb_chainunlock(brl->w->tdb, kbuf);
435 /* we didn't find it */
436 status = NT_STATUS_RANGE_NOT_LOCKED;
440 tdb_chainunlock(brl->w->tdb, kbuf);
446 remove a pending lock. This is called when the caller has either
447 given up trying to establish a lock or when they have succeeded in
448 getting it. In either case they no longer need to be notified.
450 NTSTATUS brl_remove_pending(void *brl_ctx,
454 struct brl_context *brl = brl_ctx;
457 struct lock_struct *locks;
460 kbuf.dptr = file_key->data;
461 kbuf.dsize = file_key->length;
463 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
464 return NT_STATUS_INTERNAL_DB_CORRUPTION;
467 dbuf = tdb_fetch(brl->w->tdb, kbuf);
469 tdb_chainunlock(brl->w->tdb, kbuf);
470 return NT_STATUS_RANGE_NOT_LOCKED;
473 /* there are existing locks - find a match */
474 locks = (struct lock_struct *)dbuf.dptr;
475 count = dbuf.dsize / sizeof(*locks);
477 for (i=0; i<count; i++) {
478 struct lock_struct *lock = &locks[i];
480 if (lock->notify_ptr == notify_ptr &&
481 lock->context.server == brl->server) {
482 /* found it - delete it */
484 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
485 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
490 memmove(&locks[i], &locks[i+1],
491 sizeof(*locks)*((count-1) - i));
494 dbuf.dsize = count * sizeof(*locks);
495 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
496 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
502 tdb_chainunlock(brl->w->tdb, kbuf);
507 /* we didn't find it */
508 status = NT_STATUS_RANGE_NOT_LOCKED;
512 tdb_chainunlock(brl->w->tdb, kbuf);
518 Test if we are allowed to perform IO on a region of an open file
520 NTSTATUS brl_locktest(void *brl_ctx,
524 uint64_t start, uint64_t size,
525 enum brl_type lock_type)
527 struct brl_context *brl = brl_ctx;
530 struct lock_struct lock, *locks;
532 kbuf.dptr = file_key->data;
533 kbuf.dsize = file_key->length;
535 dbuf = tdb_fetch(brl->w->tdb, kbuf);
536 if (dbuf.dptr == NULL) {
540 lock.context.smbpid = smbpid;
541 lock.context.server = brl->server;
542 lock.context.tid = brl->tid;
546 lock.lock_type = lock_type;
548 /* there are existing locks - make sure they don't conflict */
549 locks = (struct lock_struct *)dbuf.dptr;
550 count = dbuf.dsize / sizeof(*locks);
552 for (i=0; i<count; i++) {
553 if (brl_conflict_other(&locks[i], &lock)) {
555 return NT_STATUS_FILE_LOCK_CONFLICT;
565 Remove any locks associated with a open file.
567 NTSTATUS brl_close(void *brl_ctx,
568 DATA_BLOB *file_key, int fnum)
570 struct brl_context *brl = brl_ctx;
572 int count, i, dcount=0;
573 struct lock_struct *locks;
576 kbuf.dptr = file_key->data;
577 kbuf.dsize = file_key->length;
579 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
580 return NT_STATUS_INTERNAL_DB_CORRUPTION;
583 dbuf = tdb_fetch(brl->w->tdb, kbuf);
585 tdb_chainunlock(brl->w->tdb, kbuf);
589 /* there are existing locks - remove any for this fnum */
590 locks = (struct lock_struct *)dbuf.dptr;
591 count = dbuf.dsize / sizeof(*locks);
593 for (i=0; i<count; i++) {
594 struct lock_struct *lock = &locks[i];
596 if (lock->context.tid == brl->tid &&
597 lock->context.server == brl->server &&
598 lock->fnum == fnum) {
599 /* found it - delete it */
600 if (count > 1 && i < count-1) {
601 memmove(&locks[i], &locks[i+1],
602 sizeof(*locks)*((count-1) - i));
610 status = NT_STATUS_OK;
613 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
614 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
616 } else if (dcount != 0) {
617 /* tell all pending lock holders for this file that
618 they have a chance now. This is a bit indiscriminant,
620 brl_notify_all(brl, locks, count);
622 dbuf.dsize = count * sizeof(*locks);
624 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
625 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
630 tdb_chainunlock(brl->w->tdb, kbuf);