s3: Cache brlock.tdb entries for the fast read&write strict locking code path
authorVolker Lendecke <vl@samba.org>
Mon, 16 Nov 2009 08:40:47 +0000 (09:40 +0100)
committerVolker Lendecke <vl@samba.org>
Sat, 21 Nov 2009 10:40:13 +0000 (11:40 +0100)
For a netbench run this gains around 2% user-space CPU, fetching a 100MB file
takes around 4% less.

source3/include/proto.h
source3/include/smb.h
source3/locking/brlock.c
source3/locking/locking.c

index cad865197444fee9230bd0ec37d1de40e5f5113c..feeac590040ac543804490a045f698430bb37eea 100644 (file)
@@ -3428,8 +3428,7 @@ int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
               void *private_data);
 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
                                        files_struct *fsp);
-struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
-                                       files_struct *fsp);
+struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp);
 void brl_register_msgs(struct messaging_context *msg_ctx);
 
 /* The following definitions come from locking/locking.c  */
index 2ff60f6c44e0c65518428417429ce4d1ce16e53f..a3acb7c415da2055d1409b63944b92756b39dce2 100644 (file)
@@ -461,6 +461,14 @@ typedef struct files_struct {
 
        struct files_struct *base_fsp; /* placeholder for delete on close */
 
+       /*
+        * Read-only cached brlock record, thrown away when the
+        * brlock.tdb seqnum changes. This avoids fetching data from
+        * the brlock.tdb on every read/write call.
+        */
+       int brlock_seqnum;
+       struct byte_range_lock *brlock_rec;
+
        struct dptr_struct *dptr;
 } files_struct;
 
index c72fad7f2e32f23d4dfc6c6791a83f0fa0ded5c5..d3f5e61f7d1c5a06f5b1874a7613b9d1c7c09a18 100644 (file)
@@ -264,12 +264,25 @@ NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool
 
 void brl_init(bool read_only)
 {
+       int tdb_flags;
+
        if (brlock_db) {
                return;
        }
+
+       tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST;
+
+       if (!lp_clustering()) {
+               /*
+                * We can't use the SEQNUM trick to cache brlock
+                * entries in the clustering case because ctdb seqnum
+                * propagation has a delay.
+                */
+               tdb_flags |= TDB_SEQNUM;
+       }
+
        brlock_db = db_open(NULL, lock_path("brlock.tdb"),
-                           lp_open_files_db_hash_size(),
-                           TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST,
+                           lp_open_files_db_hash_size(), tdb_flags,
                            read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
        if (!brlock_db) {
                DEBUG(0,("Failed to open byte range locking database %s\n",
@@ -1890,10 +1903,49 @@ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
        return brl_get_locks_internal(mem_ctx, fsp, False);
 }
 
-struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
-                                       files_struct *fsp)
+struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
 {
-       return brl_get_locks_internal(mem_ctx, fsp, True);
+       struct byte_range_lock *br_lock;
+
+       if (lp_clustering()) {
+               return brl_get_locks_internal(talloc_tos(), fsp, true);
+       }
+
+       if ((fsp->brlock_rec != NULL)
+           && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
+               return fsp->brlock_rec;
+       }
+
+       TALLOC_FREE(fsp->brlock_rec);
+
+       br_lock = brl_get_locks_internal(talloc_tos(), fsp, false);
+       if (br_lock == NULL) {
+               return NULL;
+       }
+       fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
+
+       fsp->brlock_rec = talloc_zero(fsp, struct byte_range_lock);
+       if (fsp->brlock_rec == NULL) {
+               goto fail;
+       }
+       fsp->brlock_rec->fsp = fsp;
+       fsp->brlock_rec->num_locks = br_lock->num_locks;
+       fsp->brlock_rec->read_only = true;
+       fsp->brlock_rec->key = br_lock->key;
+
+       fsp->brlock_rec->lock_data = (struct lock_struct *)
+               talloc_memdup(fsp->brlock_rec, br_lock->lock_data,
+                             sizeof(struct lock_struct) * br_lock->num_locks);
+       if (fsp->brlock_rec->lock_data == NULL) {
+               goto fail;
+       }
+
+       TALLOC_FREE(br_lock);
+       return fsp->brlock_rec;
+fail:
+       TALLOC_FREE(br_lock);
+       TALLOC_FREE(fsp->brlock_rec);
+       return NULL;
 }
 
 struct brl_revalidate_state {
index cf787d4fac90d51921fb536f34e507de53ef1502..5a6fdf081ebf09571fc2eae945947f5501f321cb 100644 (file)
@@ -116,7 +116,9 @@ bool strict_lock_default(files_struct *fsp, struct lock_struct *plock)
                        DEBUG(10,("is_locked: optimisation - level II oplock on file %s\n", fsp_str_dbg(fsp)));
                        ret = True;
                } else {
-                       struct byte_range_lock *br_lck = brl_get_locks_readonly(talloc_tos(), fsp);
+                       struct byte_range_lock *br_lck;
+
+                       br_lck = brl_get_locks_readonly(fsp);
                        if (!br_lck) {
                                return True;
                        }
@@ -127,10 +129,11 @@ bool strict_lock_default(files_struct *fsp, struct lock_struct *plock)
                                        plock->size,
                                        plock->lock_type,
                                        plock->lock_flav);
-                       TALLOC_FREE(br_lck);
                }
        } else {
-               struct byte_range_lock *br_lck = brl_get_locks_readonly(talloc_tos(), fsp);
+               struct byte_range_lock *br_lck;
+
+               br_lck = brl_get_locks_readonly(fsp);
                if (!br_lck) {
                        return True;
                }
@@ -141,7 +144,6 @@ bool strict_lock_default(files_struct *fsp, struct lock_struct *plock)
                                plock->size,
                                plock->lock_type,
                                plock->lock_flav);
-               TALLOC_FREE(br_lck);
        }
 
        DEBUG(10,("strict_lock_default: flavour = %s brl start=%.0f "
@@ -170,7 +172,6 @@ NTSTATUS query_lock(files_struct *fsp,
                        enum brl_flavour lock_flav)
 {
        struct byte_range_lock *br_lck = NULL;
-       NTSTATUS status = NT_STATUS_LOCK_NOT_GRANTED;
 
        if (!fsp->can_lock) {
                return fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
@@ -180,21 +181,18 @@ NTSTATUS query_lock(files_struct *fsp,
                return NT_STATUS_OK;
        }
 
-       br_lck = brl_get_locks_readonly(talloc_tos(), fsp);
+       br_lck = brl_get_locks_readonly(fsp);
        if (!br_lck) {
                return NT_STATUS_NO_MEMORY;
        }
 
-       status = brl_lockquery(br_lck,
+       return brl_lockquery(br_lck,
                        psmbpid,
                        procid_self(),
                        poffset,
                        pcount,
                        plock_type,
                        lock_flav);
-
-       TALLOC_FREE(br_lck);
-       return status;
 }
 
 static void increment_current_lock_count(files_struct *fsp,