2 Unix SMB/CIFS implementation.
4 generic byte range locking code
6 Copyright (C) Andrew Tridgell 1992-2004
7 Copyright (C) Jeremy Allison 1992-2000
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 /* This module implements a tdb based byte range locking service,
25 replacing the fcntl() based byte range locking previously
26 used. This allows us to provide the same semantics as NT */
29 #include "system/filesys.h"
30 #include "lib/tdb/include/tdb.h"
34 in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
35 a file. For a local posix filesystem this will usually be a combination
36 of the device and inode numbers of the file, but it can be anything
37 that uniquely idetifies a file for locking purposes, as long
38 as it is applied consistently.
42 the lock context contains the elements that define whether one
43 lock is the same as another lock
51 /* The data in brlock records is an unsorted linear array of these
52 records. It is unnecessary to store the count as tdb provides the
55 struct lock_context context;
59 enum brl_type lock_type;
67 struct messaging_context *messaging_ctx;
68 struct lock_struct last_lock;
73 Open up the brlock.tdb database. Close it down using
74 talloc_free(). We need the messaging_ctx to allow for
75 pending lock notifications.
77 struct brl_context *brl_init(TALLOC_CTX *mem_ctx, uint32_t server, uint16_t tid,
78 struct messaging_context *messaging_ctx)
81 struct brl_context *brl;
83 brl = talloc(mem_ctx, struct brl_context);
88 path = smbd_tmp_path(brl, "brlock.tdb");
89 brl->w = tdb_wrap_open(brl, path, 0,
90 TDB_DEFAULT, O_RDWR|O_CREAT, 0600);
99 brl->messaging_ctx = messaging_ctx;
100 ZERO_STRUCT(brl->last_lock);
107 see if two locking contexts are equal
109 static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
111 return (ctx1->server == ctx2->server &&
112 ctx1->smbpid == ctx2->smbpid &&
113 ctx1->tid == ctx2->tid);
117 see if lck1 and lck2 overlap
119 static BOOL brl_overlap(struct lock_struct *lck1,
120 struct lock_struct *lck2)
122 /* this extra check is not redundent - it copes with locks
123 that go beyond the end of 64 bit file space */
124 if (lck1->size != 0 &&
125 lck1->start == lck2->start &&
126 lck1->size == lck2->size) {
130 if (lck1->start >= (lck2->start+lck2->size) ||
131 lck2->start >= (lck1->start+lck1->size)) {
138 See if lock2 can be added when lock1 is in place.
140 static BOOL brl_conflict(struct lock_struct *lck1,
141 struct lock_struct *lck2)
143 /* pending locks don't conflict with anything */
144 if (lck1->lock_type >= PENDING_READ_LOCK ||
145 lck2->lock_type >= PENDING_READ_LOCK) {
149 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
153 if (brl_same_context(&lck1->context, &lck2->context) &&
154 lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
158 return brl_overlap(lck1, lck2);
163 Check to see if this lock conflicts, but ignore our own locks on the
166 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
168 /* pending locks don't conflict with anything */
169 if (lck1->lock_type >= PENDING_READ_LOCK ||
170 lck2->lock_type >= PENDING_READ_LOCK) {
174 if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
178 * note that incoming write calls conflict with existing READ
179 * locks even if the context is the same. JRA. See LOCKTEST7
182 if (brl_same_context(&lck1->context, &lck2->context) &&
183 lck1->fnum == lck2->fnum &&
184 (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
188 return brl_overlap(lck1, lck2);
193 amazingly enough, w2k3 "remembers" whether the last lock failure
194 is the same as this one and changes its error code. I wonder if any
197 static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
199 if (lock->context.server == brl->last_lock.context.server &&
200 lock->context.tid == brl->last_lock.context.tid &&
201 lock->fnum == brl->last_lock.fnum &&
202 lock->start == brl->last_lock.start &&
203 lock->size == brl->last_lock.size) {
204 return NT_STATUS_FILE_LOCK_CONFLICT;
206 brl->last_lock = *lock;
207 if (lock->start >= 0xEF000000 &&
208 (lock->start >> 63) == 0) {
209 /* amazing the little things you learn with a test
210 suite. Locks beyond this offset (as a 64 bit
211 number!) always generate the conflict error code,
212 unless the top bit is set */
213 return NT_STATUS_FILE_LOCK_CONFLICT;
215 return NT_STATUS_LOCK_NOT_GRANTED;
219 Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
220 which case a real lock is first tried, and if that fails then a
221 pending lock is created. When the pending lock is triggered (by
222 someone else closing an overlapping lock range) a messaging
223 notification is sent, identified by the notify_ptr
225 NTSTATUS brl_lock(struct brl_context *brl,
229 uint64_t start, uint64_t size,
230 enum brl_type lock_type,
235 struct lock_struct lock, *locks=NULL;
238 kbuf.dptr = (char *)file_key->data;
239 kbuf.dsize = file_key->length;
241 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
242 return NT_STATUS_INTERNAL_DB_CORRUPTION;
245 /* if this is a pending lock, then with the chainlock held we
246 try to get the real lock. If we succeed then we don't need
247 to make it pending. This prevents a possible race condition
248 where the pending lock gets created after the lock that is
249 preventing the real lock gets removed */
250 if (lock_type >= PENDING_READ_LOCK) {
251 enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
252 status = brl_lock(brl, file_key, smbpid, fnum, start, size, rw, NULL);
253 if (NT_STATUS_IS_OK(status)) {
254 tdb_chainunlock(brl->w->tdb, kbuf);
259 dbuf = tdb_fetch(brl->w->tdb, kbuf);
261 lock.context.smbpid = smbpid;
262 lock.context.server = brl->server;
263 lock.context.tid = brl->tid;
267 lock.lock_type = lock_type;
268 lock.notify_ptr = notify_ptr;
271 /* there are existing locks - make sure they don't conflict */
272 locks = (struct lock_struct *)dbuf.dptr;
273 count = dbuf.dsize / sizeof(*locks);
274 for (i=0; i<count; i++) {
275 if (brl_conflict(&locks[i], &lock)) {
276 status = brl_lock_failed(brl, &lock);
282 /* no conflicts - add it to the list of locks */
283 locks = realloc_p(locks, struct lock_struct, count+1);
285 status = NT_STATUS_NO_MEMORY;
288 dbuf.dptr = (char *)locks;
291 dbuf.dsize += sizeof(lock);
293 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
294 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
299 tdb_chainunlock(brl->w->tdb, kbuf);
301 /* the caller needs to know if the real lock was granted. If
302 we have reached here then it must be a pending lock that
303 was granted, so tell them the lock failed */
304 if (lock_type >= PENDING_READ_LOCK) {
305 return brl_lock_failed(brl, &lock);
313 tdb_chainunlock(brl->w->tdb, kbuf);
319 we are removing a lock that might be holding up a pending lock. Scan for pending
320 locks that cover this range and if we find any then notify the server that it should
323 static void brl_notify_unlock(struct brl_context *brl,
324 struct lock_struct *locks, int count,
325 struct lock_struct *removed_lock)
329 /* the last_notice logic is to prevent stampeding on a lock
330 range. It prevents us sending hundreds of notifies on the
331 same range of bytes. It doesn't prevent all possible
332 stampedes, but it does prevent the most common problem */
335 for (i=0;i<count;i++) {
336 if (locks[i].lock_type >= PENDING_READ_LOCK &&
337 brl_overlap(&locks[i], removed_lock)) {
338 if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
341 if (locks[i].lock_type == PENDING_WRITE_LOCK) {
344 messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
345 MSG_BRL_RETRY, locks[i].notify_ptr);
352 send notifications for all pending locks - the file is being closed by this
355 static void brl_notify_all(struct brl_context *brl,
356 struct lock_struct *locks, int count)
359 for (i=0;i<count;i++) {
360 if (locks->lock_type >= PENDING_READ_LOCK) {
361 brl_notify_unlock(brl, locks, count, &locks[i]);
369 Unlock a range of bytes.
371 NTSTATUS brl_unlock(struct brl_context *brl,
375 uint64_t start, uint64_t size)
379 struct lock_struct *locks;
380 struct lock_context context;
383 kbuf.dptr = (char *)file_key->data;
384 kbuf.dsize = file_key->length;
386 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
387 return NT_STATUS_INTERNAL_DB_CORRUPTION;
390 dbuf = tdb_fetch(brl->w->tdb, kbuf);
392 tdb_chainunlock(brl->w->tdb, kbuf);
393 return NT_STATUS_RANGE_NOT_LOCKED;
396 context.smbpid = smbpid;
397 context.server = brl->server;
398 context.tid = brl->tid;
400 /* there are existing locks - find a match */
401 locks = (struct lock_struct *)dbuf.dptr;
402 count = dbuf.dsize / sizeof(*locks);
404 for (i=0; i<count; i++) {
405 struct lock_struct *lock = &locks[i];
407 if (brl_same_context(&lock->context, &context) &&
408 lock->fnum == fnum &&
409 lock->start == start &&
410 lock->size == size &&
411 lock->notify_ptr == NULL) {
412 /* found it - delete it */
414 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
415 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
419 struct lock_struct removed_lock = *lock;
421 memmove(&locks[i], &locks[i+1],
422 sizeof(*locks)*((count-1) - i));
426 /* send notifications for any relevant pending locks */
427 brl_notify_unlock(brl, locks, count, &removed_lock);
429 dbuf.dsize = count * sizeof(*locks);
431 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
432 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
438 tdb_chainunlock(brl->w->tdb, kbuf);
443 /* we didn't find it */
444 status = NT_STATUS_RANGE_NOT_LOCKED;
448 tdb_chainunlock(brl->w->tdb, kbuf);
454 remove a pending lock. This is called when the caller has either
455 given up trying to establish a lock or when they have succeeded in
456 getting it. In either case they no longer need to be notified.
458 NTSTATUS brl_remove_pending(struct brl_context *brl,
464 struct lock_struct *locks;
467 kbuf.dptr = (char *)file_key->data;
468 kbuf.dsize = file_key->length;
470 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
471 return NT_STATUS_INTERNAL_DB_CORRUPTION;
474 dbuf = tdb_fetch(brl->w->tdb, kbuf);
476 tdb_chainunlock(brl->w->tdb, kbuf);
477 return NT_STATUS_RANGE_NOT_LOCKED;
480 /* there are existing locks - find a match */
481 locks = (struct lock_struct *)dbuf.dptr;
482 count = dbuf.dsize / sizeof(*locks);
484 for (i=0; i<count; i++) {
485 struct lock_struct *lock = &locks[i];
487 if (lock->notify_ptr == notify_ptr &&
488 lock->context.server == brl->server) {
489 /* found it - delete it */
491 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
492 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
497 memmove(&locks[i], &locks[i+1],
498 sizeof(*locks)*((count-1) - i));
501 dbuf.dsize = count * sizeof(*locks);
502 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
503 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
509 tdb_chainunlock(brl->w->tdb, kbuf);
514 /* we didn't find it */
515 status = NT_STATUS_RANGE_NOT_LOCKED;
519 tdb_chainunlock(brl->w->tdb, kbuf);
525 Test if we are allowed to perform IO on a region of an open file
527 NTSTATUS brl_locktest(struct brl_context *brl,
531 uint64_t start, uint64_t size,
532 enum brl_type lock_type)
536 struct lock_struct lock, *locks;
538 kbuf.dptr = (char *)file_key->data;
539 kbuf.dsize = file_key->length;
541 dbuf = tdb_fetch(brl->w->tdb, kbuf);
542 if (dbuf.dptr == NULL) {
546 lock.context.smbpid = smbpid;
547 lock.context.server = brl->server;
548 lock.context.tid = brl->tid;
552 lock.lock_type = lock_type;
554 /* there are existing locks - make sure they don't conflict */
555 locks = (struct lock_struct *)dbuf.dptr;
556 count = dbuf.dsize / sizeof(*locks);
558 for (i=0; i<count; i++) {
559 if (brl_conflict_other(&locks[i], &lock)) {
561 return NT_STATUS_FILE_LOCK_CONFLICT;
571 Remove any locks associated with a open file.
573 NTSTATUS brl_close(struct brl_context *brl,
574 DATA_BLOB *file_key, int fnum)
577 int count, i, dcount=0;
578 struct lock_struct *locks;
581 kbuf.dptr = (char *)file_key->data;
582 kbuf.dsize = file_key->length;
584 if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
585 return NT_STATUS_INTERNAL_DB_CORRUPTION;
588 dbuf = tdb_fetch(brl->w->tdb, kbuf);
590 tdb_chainunlock(brl->w->tdb, kbuf);
594 /* there are existing locks - remove any for this fnum */
595 locks = (struct lock_struct *)dbuf.dptr;
596 count = dbuf.dsize / sizeof(*locks);
598 for (i=0; i<count; i++) {
599 struct lock_struct *lock = &locks[i];
601 if (lock->context.tid == brl->tid &&
602 lock->context.server == brl->server &&
603 lock->fnum == fnum) {
604 /* found it - delete it */
605 if (count > 1 && i < count-1) {
606 memmove(&locks[i], &locks[i+1],
607 sizeof(*locks)*((count-1) - i));
615 status = NT_STATUS_OK;
618 if (tdb_delete(brl->w->tdb, kbuf) != 0) {
619 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
621 } else if (dcount != 0) {
622 /* tell all pending lock holders for this file that
623 they have a chance now. This is a bit indiscriminant,
625 brl_notify_all(brl, locks, count);
627 dbuf.dsize = count * sizeof(*locks);
629 if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
630 status = NT_STATUS_INTERNAL_DB_CORRUPTION;
635 tdb_chainunlock(brl->w->tdb, kbuf);