s3: Remove a comment from ancient times that no longer applies
[ira/wip.git] / source3 / smbd / oplock.c
index 385f998b1ce31ac46a8cc19c056c984175b0b3c8..7ce450c1fe50ebd1f21217d0cbeaa2c2b806e15a 100644 (file)
@@ -7,7 +7,7 @@
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
+#define DBGC_CLASS DBGC_LOCKING
 #include "includes.h"
-
-/* Current number of oplocks we have outstanding. */
-static int32 exclusive_oplocks_open = 0;
-static int32 level_II_oplocks_open = 0;
-BOOL global_client_failed_oplock_break = False;
-
-extern struct timeval smb_last_time;
-extern uint32 global_client_caps;
-extern int smb_read_error;
-
-static struct kernel_oplocks *koplocks;
+#include "smbd/globals.h"
 
 /****************************************************************************
  Get the number of current exclusive oplocks.
@@ -42,65 +32,23 @@ int32 get_number_of_exclusive_open_oplocks(void)
   return exclusive_oplocks_open;
 }
 
-/****************************************************************************
- Return True if an oplock message is pending.
-****************************************************************************/
-
-BOOL oplock_message_waiting(fd_set *fds)
-{
-       if (koplocks && koplocks->msg_waiting(fds))
-               return True;
-
-       return False;
-}
-
-/****************************************************************************
- Read an oplock break message from either the oplock UDP fd or the
- kernel (if kernel oplocks are supported).
-
- If timeout is zero then *fds contains the file descriptors that
- are ready to be read and acted upon. If timeout is non-zero then
- *fds contains the file descriptors to be selected on for read.
- The timeout is in milliseconds
-
-****************************************************************************/
-
-void process_kernel_oplocks(void)
+/*
+ * helper function used by the kernel oplock backends to post the break message
+ */
+void break_kernel_oplock(struct messaging_context *msg_ctx, files_struct *fsp)
 {
-       fd_set fds;
-
-       FD_ZERO(&fds);
-       smb_read_error = 0;
-
-       /*
-        * We need to check for kernel oplocks before going into the select
-        * here, as the EINTR generated by the linux kernel oplock may have
-        * already been eaten. JRA.
-        */
-
-       if (!koplocks) {
-               return;
-       }
+       uint8_t msg[MSG_SMB_KERNEL_BREAK_SIZE];
 
-       while (koplocks->msg_waiting(&fds)) { 
-               files_struct *fsp;
-               struct kernel_oplock_message msg;
+       /* Put the kernel break info into the message. */
+       push_file_id_24((char *)msg, &fsp->file_id);
+       SIVAL(msg,24,fsp->fh->gen_id);
 
-               fsp = koplocks->receive_message(&fds);
+       /* Don't need to be root here as we're only ever
+          sending to ourselves. */
 
-               if (fsp == NULL) {
-                       DEBUG(3, ("Kernel oplock message announced, but none "
-                                 "received\n"));
-                       return;
-               }
-
-               msg.dev = fsp->dev;
-               msg.inode = fsp->inode;
-               msg.file_id = fsp->file_id;
-               message_send_pid(pid_to_procid(sys_getpid()),
-                                MSG_SMB_KERNEL_BREAK,
-                                &msg, sizeof(msg), True);
-       }
+       messaging_send_buf(msg_ctx, procid_self(),
+                          MSG_SMB_KERNEL_BREAK,
+                          msg, MSG_SMB_KERNEL_BREAK_SIZE);
 }
 
 /****************************************************************************
@@ -108,22 +56,34 @@ void process_kernel_oplocks(void)
  disabled (just sets flags). Returns True if oplock set.
 ****************************************************************************/
 
-BOOL set_file_oplock(files_struct *fsp, int oplock_type)
+bool set_file_oplock(files_struct *fsp, int oplock_type)
 {
-       if (koplocks && !koplocks->set_oplock(fsp, oplock_type))
+       if ((fsp->oplock_type == LEVEL_II_OPLOCK)
+           && koplocks && !(koplocks->flags & KOPLOCKS_LEVEL2_SUPPORTED)) {
+               DEBUG(10, ("Refusing level2 oplock, kernel oplocks don't "
+                          "support them\n"));
+               return false;
+       }
+       if ((fsp->oplock_type != NO_OPLOCK) &&
+           (fsp->oplock_type != FAKE_LEVEL_II_OPLOCK) &&
+           koplocks &&
+           !koplocks->ops->set_oplock(koplocks, fsp, oplock_type)) {
                return False;
+       }
 
        fsp->oplock_type = oplock_type;
        fsp->sent_oplock_break = NO_BREAK_SENT;
-       if (oplock_type == LEVEL_II_OPLOCK)
+       if (oplock_type == LEVEL_II_OPLOCK) {
                level_II_oplocks_open++;
-       else
+       } else if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
                exclusive_oplocks_open++;
+       }
 
-       DEBUG(5,("set_file_oplock: granted oplock on file %s, dev = %x, inode = %.0f, file_id = %lu, \
-tv_sec = %x, tv_usec = %x\n",
-                fsp->fsp_name, (unsigned int)fsp->dev, (double)fsp->inode, fsp->file_id,
-                (int)fsp->open_time.tv_sec, (int)fsp->open_time.tv_usec ));
+       DEBUG(5,("set_file_oplock: granted oplock on file %s, %s/%lu, "
+                   "tv_sec = %x, tv_usec = %x\n",
+                fsp_str_dbg(fsp), file_id_string_tos(&fsp->file_id),
+                fsp->fh->gen_id, (int)fsp->open_time.tv_sec,
+                (int)fsp->open_time.tv_usec ));
 
        return True;
 }
@@ -137,21 +97,29 @@ void release_file_oplock(files_struct *fsp)
        if ((fsp->oplock_type != NO_OPLOCK) &&
            (fsp->oplock_type != FAKE_LEVEL_II_OPLOCK) &&
            koplocks) {
-               koplocks->release_oplock(fsp);
+               koplocks->ops->release_oplock(koplocks, fsp, NO_OPLOCK);
        }
 
-       if (fsp->oplock_type == LEVEL_II_OPLOCK)
+       if (fsp->oplock_type == LEVEL_II_OPLOCK) {
                level_II_oplocks_open--;
-       else if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type))
+       } else if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
                exclusive_oplocks_open--;
+       }
 
        SMB_ASSERT(exclusive_oplocks_open>=0);
        SMB_ASSERT(level_II_oplocks_open>=0);
-       
-       fsp->oplock_type = NO_OPLOCK;
+
+       if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
+               /* This doesn't matter for close. */
+               fsp->oplock_type = FAKE_LEVEL_II_OPLOCK;
+       } else {
+               fsp->oplock_type = NO_OPLOCK;
+       }
        fsp->sent_oplock_break = NO_BREAK_SENT;
-       
+
        flush_write_cache(fsp, OPLOCK_RELEASE_FLUSH);
+
+       TALLOC_FREE(fsp->oplock_timeout);
 }
 
 /****************************************************************************
@@ -160,8 +128,14 @@ void release_file_oplock(files_struct *fsp)
 
 static void downgrade_file_oplock(files_struct *fsp)
 {
-       if (koplocks)
-               koplocks->release_oplock(fsp);
+       if (!EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
+               DEBUG(0, ("trying to downgrade an already-downgraded oplock!\n"));
+               return;
+       }
+
+       if (koplocks) {
+               koplocks->ops->release_oplock(koplocks, fsp, LEVEL_II_OPLOCK);
+       }
        fsp->oplock_type = LEVEL_II_OPLOCK;
        exclusive_oplocks_open--;
        level_II_oplocks_open++;
@@ -174,77 +148,66 @@ static void downgrade_file_oplock(files_struct *fsp)
  to none even if a "break-to-level II" was sent.
 ****************************************************************************/
 
-BOOL remove_oplock(files_struct *fsp)
+bool remove_oplock(files_struct *fsp)
 {
-       SMB_DEV_T dev = fsp->dev;
-       SMB_INO_T inode = fsp->inode;
-       BOOL ret;
+       bool ret;
        struct share_mode_lock *lck;
 
        /* Remove the oplock flag from the sharemode. */
-       lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL);
+       lck = get_share_mode_lock(talloc_tos(), fsp->file_id, NULL, NULL,
+                                 NULL);
        if (lck == NULL) {
                DEBUG(0,("remove_oplock: failed to lock share entry for "
-                        "file %s\n", fsp->fsp_name ));
+                        "file %s\n", fsp_str_dbg(fsp)));
                return False;
        }
        ret = remove_share_oplock(lck, fsp);
        if (!ret) {
                DEBUG(0,("remove_oplock: failed to remove share oplock for "
-                        "file %s fnum %d, dev = %x, inode = %.0f\n",
-                        fsp->fsp_name, fsp->fnum, (unsigned int)dev,
-                        (double)inode));
+                        "file %s fnum %d, %s\n",
+                        fsp_str_dbg(fsp), fsp->fnum,
+                        file_id_string_tos(&fsp->file_id)));
        }
        release_file_oplock(fsp);
-       talloc_free(lck);
+       TALLOC_FREE(lck);
        return ret;
 }
 
 /*
  * Deal with a reply when a break-to-level II was sent.
  */
-BOOL downgrade_oplock(files_struct *fsp)
+bool downgrade_oplock(files_struct *fsp)
 {
-       SMB_DEV_T dev = fsp->dev;
-       SMB_INO_T inode = fsp->inode;
-       BOOL ret;
+       bool ret;
        struct share_mode_lock *lck;
 
-       lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL);
+       lck = get_share_mode_lock(talloc_tos(), fsp->file_id, NULL, NULL,
+                                 NULL);
        if (lck == NULL) {
                DEBUG(0,("downgrade_oplock: failed to lock share entry for "
-                        "file %s\n", fsp->fsp_name ));
+                        "file %s\n", fsp_str_dbg(fsp)));
                return False;
        }
        ret = downgrade_share_oplock(lck, fsp);
        if (!ret) {
                DEBUG(0,("downgrade_oplock: failed to downgrade share oplock "
-                        "for file %s fnum %d, dev = %x, inode = %.0f\n",
-                        fsp->fsp_name, fsp->fnum, (unsigned int)dev,
-                        (double)inode));
+                        "for file %s fnum %d, file_id %s\n",
+                        fsp_str_dbg(fsp), fsp->fnum,
+                        file_id_string_tos(&fsp->file_id)));
        }
-               
+
        downgrade_file_oplock(fsp);
-       talloc_free(lck);
+       TALLOC_FREE(lck);
        return ret;
 }
 
-/****************************************************************************
- Setup the listening set of file descriptors for an oplock break
- message either from the UDP socket or from the kernel. Returns the maximum
- fd used.
-****************************************************************************/
-
-int setup_oplock_select_set( fd_set *fds)
+/*
+ * Some kernel oplock implementations handle the notification themselves.
+ */
+bool should_notify_deferred_opens()
 {
-       int maxfd = 0;
-
-       if (koplocks && koplocks->notification_fd != -1) {
-               FD_SET(koplocks->notification_fd, fds);
-               maxfd = MAX(maxfd, koplocks->notification_fd);
-       }
-
-       return maxfd;
+       return !(koplocks &&
+               (koplocks->flags & KOPLOCKS_DEFERRED_OPEN_NOTIFICATION));
 }
 
 /****************************************************************************
@@ -252,7 +215,7 @@ int setup_oplock_select_set( fd_set *fds)
 ****************************************************************************/
 
 static char *new_break_smb_message(TALLOC_CTX *mem_ctx,
-                                  files_struct *fsp, uint8_t cmd)
+                                  files_struct *fsp, uint8 cmd)
 {
        char *result = TALLOC_ARRAY(mem_ctx, char, smb_size + 8*2 + 0);
 
@@ -262,7 +225,7 @@ static char *new_break_smb_message(TALLOC_CTX *mem_ctx,
        }
 
        memset(result,'\0',smb_size);
-       set_message(result,8,0,True);
+       srv_set_message(result,8,0,true);
        SCVAL(result,smb_com,SMBlockingX);
        SSVAL(result,smb_tid,fsp->conn->cnum);
        SSVAL(result,smb_pid,0xFFFF);
@@ -281,20 +244,10 @@ static char *new_break_smb_message(TALLOC_CTX *mem_ctx,
 
 static void wait_before_sending_break(void)
 {
-       struct timeval cur_tv;
-       long wait_left = (long)lp_oplock_break_wait_time();
-
-       if (wait_left == 0)
-               return;
-
-       GetTimeOfDay(&cur_tv);
-
-       wait_left -= ((cur_tv.tv_sec - smb_last_time.tv_sec)*1000) +
-                ((cur_tv.tv_usec - smb_last_time.tv_usec)/1000);
+       long wait_time = (long)lp_oplock_break_wait_time();
 
-       if(wait_left > 0) {
-               wait_left = MIN(wait_left, 1000);
-               sys_usleep(wait_left * 1000);
+       if (wait_time) {
+               smb_msleep(wait_time);
        }
 }
 
@@ -302,13 +255,13 @@ static void wait_before_sending_break(void)
  Ensure that we have a valid oplock.
 ****************************************************************************/
 
-static files_struct *initial_break_processing(SMB_DEV_T dev, SMB_INO_T inode, unsigned long file_id)
+static files_struct *initial_break_processing(struct file_id id, unsigned long file_id)
 {
        files_struct *fsp = NULL;
 
        if( DEBUGLVL( 3 ) ) {
-               dbgtext( "initial_break_processing: called for dev = %x, inode = %.0f file_id = %lu\n",
-                       (unsigned int)dev, (double)inode, file_id);
+               dbgtext( "initial_break_processing: called for %s/%u\n",
+                        file_id_string_tos(&id), (int)file_id);
                dbgtext( "Current oplocks_open (exclusive = %d, levelII = %d)\n",
                        exclusive_oplocks_open, level_II_oplocks_open );
        }
@@ -319,14 +272,13 @@ static files_struct *initial_break_processing(SMB_DEV_T dev, SMB_INO_T inode, un
         * we have an oplock on it.
         */
 
-       fsp = file_find_dif(dev, inode, file_id);
+       fsp = file_find_dif(id, file_id);
 
        if(fsp == NULL) {
                /* The file could have been closed in the meantime - return success. */
                if( DEBUGLVL( 3 ) ) {
                        dbgtext( "initial_break_processing: cannot find open file with " );
-                       dbgtext( "dev = %x, inode = %.0f file_id = %lu", (unsigned int)dev,
-                               (double)inode, file_id);
+                       dbgtext( "file_id %s gen_id = %lu", file_id_string_tos(&id), file_id);
                        dbgtext( "allowing break to succeed.\n" );
                }
                return NULL;
@@ -344,9 +296,10 @@ static files_struct *initial_break_processing(SMB_DEV_T dev, SMB_INO_T inode, un
 
        if(fsp->oplock_type == NO_OPLOCK) {
                if( DEBUGLVL( 3 ) ) {
-                       dbgtext( "initial_break_processing: file %s ", fsp->fsp_name );
-                       dbgtext( "(dev = %x, inode = %.0f, file_id = %lu) has no oplock.\n",
-                               (unsigned int)dev, (double)inode, fsp->file_id );
+                       dbgtext( "initial_break_processing: file %s ",
+                                fsp_str_dbg(fsp));
+                       dbgtext( "(file_id = %s gen_id = %lu) has no oplock.\n",
+                                file_id_string_tos(&id), fsp->fh->gen_id );
                        dbgtext( "Allowing break to succeed regardless.\n" );
                }
                return NULL;
@@ -355,78 +308,219 @@ static files_struct *initial_break_processing(SMB_DEV_T dev, SMB_INO_T inode, un
        return fsp;
 }
 
-static void oplock_timeout_handler(struct timed_event *te,
-                                  const struct timeval *now,
+static void oplock_timeout_handler(struct event_context *ctx,
+                                  struct timed_event *te,
+                                  struct timeval now,
                                   void *private_data)
 {
-       files_struct *fsp = private_data;
+       files_struct *fsp = (files_struct *)private_data;
 
-       DEBUG(0, ("Oplock break failed -- replying anyway\n"));
+       /* Remove the timed event handler. */
+       TALLOC_FREE(fsp->oplock_timeout);
+       DEBUG(0, ("Oplock break failed for file %s -- replying anyway\n",
+                 fsp_str_dbg(fsp)));
        global_client_failed_oplock_break = True;
        remove_oplock(fsp);
        reply_to_oplock_break_requests(fsp);
 }
 
-static void process_oplock_break_message(int msg_type, struct process_id src,
-                                        void *buf, size_t len)
+/*******************************************************************
+ Add a timeout handler waiting for the client reply.
+*******************************************************************/
+
+static void add_oplock_timeout_handler(files_struct *fsp)
+{
+       /*
+        * If kernel oplocks already notifies smbds when an oplock break times
+        * out, just return.
+        */
+       if (koplocks &&
+           (koplocks->flags & KOPLOCKS_TIMEOUT_NOTIFICATION)) {
+               return;
+       }
+
+       if (fsp->oplock_timeout != NULL) {
+               DEBUG(0, ("Logic problem -- have an oplock event hanging "
+                         "around\n"));
+       }
+
+       fsp->oplock_timeout =
+               event_add_timed(smbd_event_context(), NULL,
+                               timeval_current_ofs(OPLOCK_BREAK_TIMEOUT, 0),
+                               oplock_timeout_handler, fsp);
+
+       if (fsp->oplock_timeout == NULL) {
+               DEBUG(0, ("Could not add oplock timeout handler\n"));
+       }
+}
+
+void break_level2_to_none_async(files_struct *fsp)
+{
+       char *break_msg;
+
+       if (fsp->oplock_type == NO_OPLOCK) {
+               /* We already got a "break to none" message and we've handled
+                * it.  just ignore. */
+               DEBUG(3, ("process_oplock_async_level2_break_message: already "
+                         "broken to none, ignoring.\n"));
+               return;
+       }
+
+       if (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) {
+               /* Don't tell the client, just downgrade. */
+               DEBUG(3, ("process_oplock_async_level2_break_message: "
+                         "downgrading fake level 2 oplock.\n"));
+               remove_oplock(fsp);
+               return;
+       }
+
+       /* Ensure we're really at level2 state. */
+       SMB_ASSERT(fsp->oplock_type == LEVEL_II_OPLOCK);
+
+       DEBUG(10,("process_oplock_async_level2_break_message: sending break "
+                 "to none message for fid %d, file %s\n", fsp->fnum,
+                 fsp_str_dbg(fsp)));
+
+       /* Now send a break to none message to our client. */
+       break_msg = new_break_smb_message(NULL, fsp, OPLOCKLEVEL_NONE);
+       if (break_msg == NULL) {
+               exit_server("Could not talloc break_msg\n");
+       }
+
+       show_msg(break_msg);
+       if (!srv_send_smb(smbd_server_fd(),
+                       break_msg, false, 0,
+                       IS_CONN_ENCRYPTED(fsp->conn),
+                       NULL)) {
+               exit_server_cleanly("oplock_break: srv_send_smb failed.");
+       }
+
+       TALLOC_FREE(break_msg);
+
+       /* Async level2 request, don't send a reply, just remove the oplock. */
+       remove_oplock(fsp);
+
+}
+
+/*******************************************************************
+ This handles the case of a write triggering a break to none
+ message on a level2 oplock.
+ When we get this message we may be in any of three states :
+ NO_OPLOCK, LEVEL_II, FAKE_LEVEL2. We only send a message to
+ the client for LEVEL2.
+*******************************************************************/
+
+void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
+                                                     void *private_data,
+                                                     uint32_t msg_type,
+                                                     struct server_id src,
+                                                     DATA_BLOB *data)
+{
+       struct share_mode_entry msg;
+       files_struct *fsp;
+
+       if (data->data == NULL) {
+               DEBUG(0, ("Got NULL buffer\n"));
+               return;
+       }
+
+       if (data->length != MSG_SMB_SHARE_MODE_ENTRY_SIZE) {
+               DEBUG(0, ("Got invalid msg len %d\n", (int)data->length));
+               return;
+       }
+
+       /* De-linearize incoming message. */
+       message_to_share_mode_entry(&msg, (char *)data->data);
+
+       DEBUG(10, ("Got oplock async level 2 break message from pid %s: "
+                  "%s/%lu\n", procid_str(talloc_tos(), &src),
+                  file_id_string_tos(&msg.id), msg.share_file_id));
+
+       fsp = initial_break_processing(msg.id, msg.share_file_id);
+
+       if (fsp == NULL) {
+               /* We hit a race here. Break messages are sent, and before we
+                * get to process this message, we have closed the file. 
+                * No need to reply as this is an async message. */
+               DEBUG(3, ("process_oplock_async_level2_break_message: Did not find fsp, ignoring\n"));
+               return;
+       }
+
+       break_level2_to_none_async(fsp);
+}
+
+/*******************************************************************
+ This handles the generic oplock break message from another smbd.
+*******************************************************************/
+
+static void process_oplock_break_message(struct messaging_context *msg_ctx,
+                                        void *private_data,
+                                        uint32_t msg_type,
+                                        struct server_id src,
+                                        DATA_BLOB *data)
 {
-       struct share_mode_entry *msg = buf;
+       struct share_mode_entry msg;
        files_struct *fsp;
        char *break_msg;
-       BOOL break_to_level2 = False;
-       BOOL sign_state;
+       bool break_to_level2 = False;
 
-       if (buf == NULL) {
+       if (data->data == NULL) {
                DEBUG(0, ("Got NULL buffer\n"));
                return;
        }
 
-       if (len != sizeof(*msg)) {
-               DEBUG(0, ("Got invalid msg len %d\n", (int)len));
+       if (data->length != MSG_SMB_SHARE_MODE_ENTRY_SIZE) {
+               DEBUG(0, ("Got invalid msg len %d\n", (int)data->length));
                return;
        }
 
-       DEBUG(10, ("Got oplock break message from pid %d: %d/%d/%d\n",
-                  (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode,
-                  (int)msg->share_file_id));
+       /* De-linearize incoming message. */
+       message_to_share_mode_entry(&msg, (char *)data->data);
+
+       DEBUG(10, ("Got oplock break message from pid %s: %s/%lu\n",
+                  procid_str(talloc_tos(), &src), file_id_string_tos(&msg.id),
+                  msg.share_file_id));
 
-       fsp = initial_break_processing(msg->dev, msg->inode,
-                                      msg->share_file_id);
+       fsp = initial_break_processing(msg.id, msg.share_file_id);
 
        if (fsp == NULL) {
-               /* We hit race here. Break messages are sent, and before we
+               /* We hit race here. Break messages are sent, and before we
                 * get to process this message, we have closed the file. Reply
                 * with 'ok, oplock broken' */
                DEBUG(3, ("Did not find fsp\n"));
-               message_send_pid(src, MSG_SMB_BREAK_RESPONSE,
-                                msg, sizeof(*msg), True);
+
+               /* We just send the same message back. */
+               messaging_send_buf(msg_ctx, src, MSG_SMB_BREAK_RESPONSE,
+                                  (uint8 *)data->data,
+                                  MSG_SMB_SHARE_MODE_ENTRY_SIZE);
                return;
        }
 
        if (fsp->sent_oplock_break != NO_BREAK_SENT) {
                /* Remember we have to inform the requesting PID when the
                 * client replies */
-               msg->pid = src;
-               ADD_TO_ARRAY(NULL, struct share_mode_entry, *msg,
+               msg.pid = src;
+               ADD_TO_ARRAY(NULL, struct share_mode_entry, msg,
                             &fsp->pending_break_messages,
                             &fsp->num_pending_break_messages);
                return;
        }
 
-       if (EXCLUSIVE_OPLOCK_TYPE(msg->op_type) &&
+       if (EXCLUSIVE_OPLOCK_TYPE(msg.op_type) &&
            !EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type)) {
-               DEBUG(3, ("Already downgraded oplock on %.0f/%.0f: %s\n",
-                         (double)fsp->dev, (double)fsp->inode,
-                         fsp->fsp_name));
-               message_send_pid(src, MSG_SMB_BREAK_RESPONSE,
-                                msg, sizeof(*msg), True);
+               DEBUG(3, ("Already downgraded oplock on %s: %s\n",
+                         file_id_string_tos(&fsp->file_id),
+                         fsp_str_dbg(fsp)));
+               /* We just send the same message back. */
+               messaging_send_buf(msg_ctx, src, MSG_SMB_BREAK_RESPONSE,
+                                  (uint8 *)data->data,
+                                  MSG_SMB_SHARE_MODE_ENTRY_SIZE);
                return;
        }
 
-       if ((msg_type == MSG_SMB_BREAK_REQUEST) &&
-           (global_client_caps & CAP_LEVEL_II_OPLOCKS) && 
-           !koplocks && /* NOTE: we force levelII off for kernel oplocks -
-                         * this will change when it is supported */
+       if ((global_client_caps & CAP_LEVEL_II_OPLOCKS) && 
+           !(msg.op_type & FORCE_OPLOCK_BREAK_TO_NONE) &&
+           !(koplocks && !(koplocks->flags & KOPLOCKS_LEVEL2_SUPPORTED)) &&
            lp_level2_oplocks(SNUM(fsp->conn))) {
                break_to_level2 = True;
        }
@@ -437,75 +531,65 @@ static void process_oplock_break_message(int msg_type, struct process_id src,
                exit_server("Could not talloc break_msg\n");
        }
 
-       /* Need to wait before sending a break message to a file of our own */
-       if (procid_to_pid(&src) == sys_getpid()) {
+       /* Need to wait before sending a break message if we sent ourselves this message. */
+       if (procid_is_me(&src)) {
                wait_before_sending_break();
        }
 
-       /* Save the server smb signing state. */
-       sign_state = srv_oplock_set_signing(False);
-
        show_msg(break_msg);
-       if (!send_smb(smbd_server_fd(), break_msg)) {
-               exit_server("oplock_break: send_smb failed.");
+       if (!srv_send_smb(smbd_server_fd(),
+                       break_msg, false, 0,
+                       IS_CONN_ENCRYPTED(fsp->conn),
+                       NULL)) {
+               exit_server_cleanly("oplock_break: srv_send_smb failed.");
        }
 
-       /* Restore the sign state to what it was. */
-       srv_oplock_set_signing(sign_state);
+       TALLOC_FREE(break_msg);
 
-       talloc_free(break_msg);
+       fsp->sent_oplock_break = break_to_level2 ? LEVEL_II_BREAK_SENT:BREAK_TO_NONE_SENT;
 
-       if (msg_type == MSG_SMB_BREAK_REQUEST) {
-               fsp->sent_oplock_break = break_to_level2 ?
-                       LEVEL_II_BREAK_SENT:BREAK_TO_NONE_SENT;
-       } else {
-               /* Async level2 request, don't send a reply */
-               fsp->sent_oplock_break = ASYNC_LEVEL_II_BREAK_SENT;
-       }
-       msg->pid = src;
-       ADD_TO_ARRAY(NULL, struct share_mode_entry, *msg,
+       msg.pid = src;
+       ADD_TO_ARRAY(NULL, struct share_mode_entry, msg,
                     &fsp->pending_break_messages,
                     &fsp->num_pending_break_messages);
 
-       if (fsp->oplock_timeout != NULL) {
-               DEBUG(0, ("Logic problem -- have an oplock event hanging "
-                         "around\n"));
-       }
-
-       fsp->oplock_timeout =
-               add_timed_event(NULL,
-                               timeval_current_ofs(OPLOCK_BREAK_TIMEOUT, 0),
-                               "oplock_timeout_handler",
-                               oplock_timeout_handler, fsp);
-
-       if (fsp->oplock_timeout == NULL) {
-               DEBUG(0, ("Could not add oplock timeout handler\n"));
-       }
+       add_oplock_timeout_handler(fsp);
 }
 
-static void process_kernel_oplock_break(int msg_type, struct process_id src,
-                                       void *buf, size_t len)
+/*******************************************************************
+ This handles the kernel oplock break message.
+*******************************************************************/
+
+static void process_kernel_oplock_break(struct messaging_context *msg_ctx,
+                                       void *private_data,
+                                       uint32_t msg_type,
+                                       struct server_id src,
+                                       DATA_BLOB *data)
 {
-       struct kernel_oplock_message *msg = buf;
+       struct file_id id;
+       unsigned long file_id;
        files_struct *fsp;
        char *break_msg;
-       BOOL sign_state;
 
-       if (buf == NULL) {
+       if (data->data == NULL) {
                DEBUG(0, ("Got NULL buffer\n"));
                return;
        }
 
-       if (len != sizeof(*msg)) {
-               DEBUG(0, ("Got invalid msg len %d\n", (int)len));
+       if (data->length != MSG_SMB_KERNEL_BREAK_SIZE) {
+               DEBUG(0, ("Got invalid msg len %d\n", (int)data->length));
                return;
        }
 
-       DEBUG(10, ("Got kernel oplock break message from pid %d: %d/%d/%d\n",
-                  (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode,
-                  (int)msg->file_id));
+       /* Pull the data from the message. */
+       pull_file_id_24((char *)data->data, &id);
+       file_id = (unsigned long)IVAL(data->data, 24);
+
+       DEBUG(10, ("Got kernel oplock break message from pid %s: %s/%u\n",
+                  procid_str(talloc_tos(), &src), file_id_string_tos(&id),
+                  (unsigned int)file_id));
 
-       fsp = initial_break_processing(msg->dev, msg->inode, msg->file_id);
+       fsp = initial_break_processing(id, file_id);
 
        if (fsp == NULL) {
                DEBUG(3, ("Got a kernel oplock break message for a file "
@@ -525,84 +609,111 @@ static void process_kernel_oplock_break(int msg_type, struct process_id src,
                exit_server("Could not talloc break_msg\n");
        }
 
-       /* Save the server smb signing state. */
-       sign_state = srv_oplock_set_signing(False);
-
        show_msg(break_msg);
-       if (!send_smb(smbd_server_fd(), break_msg)) {
-               exit_server("oplock_break: send_smb failed.");
+       if (!srv_send_smb(smbd_server_fd(),
+                       break_msg, false, 0,
+                       IS_CONN_ENCRYPTED(fsp->conn),
+                       NULL)) {
+               exit_server_cleanly("oplock_break: srv_send_smb failed.");
        }
 
-       /* Restore the sign state to what it was. */
-       srv_oplock_set_signing(sign_state);
-
-       talloc_free(break_msg);
+       TALLOC_FREE(break_msg);
 
        fsp->sent_oplock_break = BREAK_TO_NONE_SENT;
+
+       add_oplock_timeout_handler(fsp);
 }
 
 void reply_to_oplock_break_requests(files_struct *fsp)
 {
        int i;
 
+       /*
+        * If kernel oplocks already notifies smbds when oplocks are
+        * broken/removed, just return.
+        */
+       if (koplocks &&
+           (koplocks->flags & KOPLOCKS_OPLOCK_BROKEN_NOTIFICATION)) {
+               return;
+       }
+
        for (i=0; i<fsp->num_pending_break_messages; i++) {
-               struct share_mode_entry *msg = &fsp->pending_break_messages[i];
-               message_send_pid(msg->pid, MSG_SMB_BREAK_RESPONSE,
-                                msg, sizeof(*msg), True);
+               struct share_mode_entry *e = &fsp->pending_break_messages[i];
+               char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
+
+               share_mode_entry_to_message(msg, e);
+
+               messaging_send_buf(smbd_messaging_context(), e->pid,
+                                  MSG_SMB_BREAK_RESPONSE,
+                                  (uint8 *)msg,
+                                  MSG_SMB_SHARE_MODE_ENTRY_SIZE);
        }
 
        SAFE_FREE(fsp->pending_break_messages);
        fsp->num_pending_break_messages = 0;
        if (fsp->oplock_timeout != NULL) {
-               talloc_free(fsp->oplock_timeout);
+               /* Remove the timed event handler. */
+               TALLOC_FREE(fsp->oplock_timeout);
                fsp->oplock_timeout = NULL;
        }
        return;
 }
 
-static void process_oplock_break_response(int msg_type, struct process_id src,
-                                         void *buf, size_t len)
+static void process_oplock_break_response(struct messaging_context *msg_ctx,
+                                         void *private_data,
+                                         uint32_t msg_type,
+                                         struct server_id src,
+                                         DATA_BLOB *data)
 {
-       struct share_mode_entry *msg = buf;
+       struct share_mode_entry msg;
 
-       if (buf == NULL) {
+       if (data->data == NULL) {
                DEBUG(0, ("Got NULL buffer\n"));
                return;
        }
 
-       if (len != sizeof(*msg)) {
-               DEBUG(0, ("Got invalid msg len %d\n", (int)len));
+       if (data->length != MSG_SMB_SHARE_MODE_ENTRY_SIZE) {
+               DEBUG(0, ("Got invalid msg len %u\n",
+                         (unsigned int)data->length));
                return;
        }
 
-       DEBUG(10, ("Got oplock break response from pid %d: %d/%d/%d mid %d\n",
-                  (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode,
-                  (int)msg->share_file_id, (int)msg->op_mid));
+       /* De-linearize incoming message. */
+       message_to_share_mode_entry(&msg, (char *)data->data);
 
-       /* Here's the hack from open.c, store the mid in the 'port' field */
-       schedule_deferred_open_smb_message(msg->op_mid);
+       DEBUG(10, ("Got oplock break response from pid %s: %s/%lu mid %u\n",
+                  procid_str(talloc_tos(), &src), file_id_string_tos(&msg.id),
+                  msg.share_file_id, (unsigned int)msg.op_mid));
+
+       schedule_deferred_open_smb_message(msg.op_mid);
 }
 
-static void process_open_retry_message(int msg_type, struct process_id src,
-                                      void *buf, size_t len)
+static void process_open_retry_message(struct messaging_context *msg_ctx,
+                                      void *private_data,
+                                      uint32_t msg_type,
+                                      struct server_id src,
+                                      DATA_BLOB *data)
 {
-       struct share_mode_entry *msg = buf;
+       struct share_mode_entry msg;
        
-       if (buf == NULL) {
+       if (data->data == NULL) {
                DEBUG(0, ("Got NULL buffer\n"));
                return;
        }
 
-       if (len != sizeof(*msg)) {
-               DEBUG(0, ("Got invalid msg len %d\n", (int)len));
+       if (data->length != MSG_SMB_SHARE_MODE_ENTRY_SIZE) {
+               DEBUG(0, ("Got invalid msg len %d\n", (int)data->length));
                return;
        }
 
-       DEBUG(10, ("Got open retry msg from pid %d: %d/%d mid %d\n",
-                  (int)procid_to_pid(&src), (int)msg->dev, (int)msg->inode,
-                  (int)msg->op_mid));
+       /* De-linearize incoming message. */
+       message_to_share_mode_entry(&msg, (char *)data->data);
+
+       DEBUG(10, ("Got open retry msg from pid %s: %s mid %u\n",
+                  procid_str(talloc_tos(), &src), file_id_string_tos(&msg.id),
+                  (unsigned int)msg.op_mid));
 
-       schedule_deferred_open_smb_message(msg->op_mid);
+       schedule_deferred_open_smb_message(msg.op_mid);
 }
 
 /****************************************************************************
@@ -611,7 +722,8 @@ static void process_open_retry_message(int msg_type, struct process_id src,
  none.
 ****************************************************************************/
 
-void release_level_2_oplocks_on_change(files_struct *fsp)
+static void contend_level2_oplocks_begin_default(files_struct *fsp,
+                                             enum level2_contention_type type)
 {
        int i;
        struct share_mode_lock *lck;
@@ -627,41 +739,30 @@ void release_level_2_oplocks_on_change(files_struct *fsp)
        if (!LEVEL_II_OPLOCK_TYPE(fsp->oplock_type))
                return;
 
-       lck = get_share_mode_lock(NULL, fsp->dev, fsp->inode, NULL);
+       lck = get_share_mode_lock(talloc_tos(), fsp->file_id, NULL, NULL,
+                                 NULL);
        if (lck == NULL) {
                DEBUG(0,("release_level_2_oplocks_on_change: failed to lock "
-                        "share mode entry for file %s.\n", fsp->fsp_name ));
+                        "share mode entry for file %s.\n", fsp_str_dbg(fsp)));
+               return;
        }
 
        DEBUG(10,("release_level_2_oplocks_on_change: num_share_modes = %d\n", 
                  lck->num_share_modes ));
 
-       if (fsp->oplock_type == FAKE_LEVEL_II_OPLOCK) {
-               /* See if someone else has already downgraded us, then we
-                  don't have to do anything */
-               for (i=0; i<lck->num_share_modes; i++) {
-                       struct share_mode_entry *e = &lck->share_modes[i];
-                       if ((e->op_type == NO_OPLOCK) &&
-                           (e->share_file_id == fsp->file_id) &&
-                           (e->dev == fsp->dev) &&
-                           (e->inode == fsp->inode) &&
-                           (procid_is_me(&e->pid))) {
-                               /* We're done */
-                               fsp->oplock_type = NO_OPLOCK;
-                               talloc_free(lck);
-                               return;
-                       }
-               }
-       }
-
        for(i = 0; i < lck->num_share_modes; i++) {
                struct share_mode_entry *share_entry = &lck->share_modes[i];
+               char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
+
+               if (!is_valid_share_mode_entry(share_entry)) {
+                       continue;
+               }
 
                /*
                 * As there could have been multiple writes waiting at the
                 * lock_share_entry gate we may not be the first to
                 * enter. Hence the state of the op_types in the share mode
-                * entries may be partly NO_OPLOCK and partly LEVEL_II
+                * entries may be partly NO_OPLOCK and partly LEVEL_II or FAKE_LEVEL_II
                 * oplock. It will do no harm to re-send break messages to
                 * those smbd's that are still waiting their turn to remove
                 * their LEVEL_II state, and also no harm to ignore existing
@@ -672,8 +773,7 @@ void release_level_2_oplocks_on_change(files_struct *fsp)
                          "share_entry[%i]->op_type == %d\n",
                          i, share_entry->op_type ));
 
-               if ((share_entry->op_type == NO_OPLOCK) ||
-                   (share_entry->op_type == FAKE_LEVEL_II_OPLOCK)) {
+               if (share_entry->op_type == NO_OPLOCK) {
                        continue;
                }
 
@@ -682,42 +782,134 @@ void release_level_2_oplocks_on_change(files_struct *fsp)
                        DEBUG(0,("release_level_2_oplocks_on_change: PANIC. "
                                 "share mode entry %d is an exlusive "
                                 "oplock !\n", i ));
-                       talloc_free(lck);
+                       TALLOC_FREE(lck);
                        abort();
                }
 
-               message_send_pid(share_entry->pid, MSG_SMB_ASYNC_LEVEL2_BREAK,
-                                share_entry, sizeof(*share_entry), True);
+               share_mode_entry_to_message(msg, share_entry);
+
+               /*
+                * Deal with a race condition when breaking level2
+                * oplocks. Don't send all the messages and release
+                * the lock, this allows someone else to come in and
+                * get a level2 lock before any of the messages are
+                * processed, and thus miss getting a break message.
+                * Ensure at least one entry (the one we're breaking)
+                * is processed immediately under the lock and becomes
+                * set as NO_OPLOCK to stop any waiter getting a level2.
+                * Bugid #5980.
+                */
+
+               if (procid_is_me(&share_entry->pid)) {
+                       wait_before_sending_break();
+                       break_level2_to_none_async(fsp);
+               } else {
+                       messaging_send_buf(smbd_messaging_context(),
+                                       share_entry->pid,
+                                       MSG_SMB_ASYNC_LEVEL2_BREAK,
+                                       (uint8 *)msg,
+                                       MSG_SMB_SHARE_MODE_ENTRY_SIZE);
+               }
        }
 
-       remove_all_share_oplocks(lck, fsp);
-       talloc_free(lck);
+       /* We let the message receivers handle removing the oplock state
+          in the share mode lock db. */
+
+       TALLOC_FREE(lck);
+}
+
+void contend_level2_oplocks_begin(files_struct *fsp,
+                                 enum level2_contention_type type)
+{
+       if (koplocks && koplocks->ops->contend_level2_oplocks_begin) {
+               koplocks->ops->contend_level2_oplocks_begin(fsp, type);
+               return;
+       }
+
+       contend_level2_oplocks_begin_default(fsp, type);
+}
+
+void contend_level2_oplocks_end(files_struct *fsp,
+                               enum level2_contention_type type)
+{
+       /* Only kernel oplocks implement this so far */
+       if (koplocks && koplocks->ops->contend_level2_oplocks_end) {
+               koplocks->ops->contend_level2_oplocks_end(fsp, type);
+       }
+}
+
+/****************************************************************************
+ Linearize a share mode entry struct to an internal oplock break message.
+****************************************************************************/
+
+void share_mode_entry_to_message(char *msg, const struct share_mode_entry *e)
+{
+       SIVAL(msg,0,(uint32)e->pid.pid);
+       SSVAL(msg,4,e->op_mid);
+       SSVAL(msg,6,e->op_type);
+       SIVAL(msg,8,e->access_mask);
+       SIVAL(msg,12,e->share_access);
+       SIVAL(msg,16,e->private_options);
+       SIVAL(msg,20,(uint32)e->time.tv_sec);
+       SIVAL(msg,24,(uint32)e->time.tv_usec);
+       push_file_id_24(msg+28, &e->id);
+       SIVAL(msg,52,e->share_file_id);
+       SIVAL(msg,56,e->uid);
+       SSVAL(msg,60,e->flags);
+#ifdef CLUSTER_SUPPORT
+       SIVAL(msg,62,e->pid.vnn);
+#endif
+}
+
+/****************************************************************************
+ De-linearize an internal oplock break message to a share mode entry struct.
+****************************************************************************/
+
+void message_to_share_mode_entry(struct share_mode_entry *e, char *msg)
+{
+       e->pid.pid = (pid_t)IVAL(msg,0);
+       e->op_mid = SVAL(msg,4);
+       e->op_type = SVAL(msg,6);
+       e->access_mask = IVAL(msg,8);
+       e->share_access = IVAL(msg,12);
+       e->private_options = IVAL(msg,16);
+       e->time.tv_sec = (time_t)IVAL(msg,20);
+       e->time.tv_usec = (int)IVAL(msg,24);
+       pull_file_id_24(msg+28, &e->id);
+       e->share_file_id = (unsigned long)IVAL(msg,52);
+       e->uid = (uint32)IVAL(msg,56);
+       e->flags = (uint16)SVAL(msg,60);
+#ifdef CLUSTER_SUPPORT
+       e->pid.vnn = IVAL(msg,62);
+#endif
 }
 
 /****************************************************************************
  Setup oplocks for this process.
 ****************************************************************************/
 
-BOOL init_oplocks(void)
+bool init_oplocks(struct messaging_context *msg_ctx)
 {
-       DEBUG(3,("open_oplock_ipc: opening loopback UDP socket.\n"));
-
-       message_register(MSG_SMB_BREAK_REQUEST,
-                        process_oplock_break_message);
-       message_register(MSG_SMB_ASYNC_LEVEL2_BREAK,
-                        process_oplock_break_message);
-       message_register(MSG_SMB_BREAK_RESPONSE,
-                        process_oplock_break_response);
-       message_register(MSG_SMB_KERNEL_BREAK,
-                        process_kernel_oplock_break);
-       message_register(MSG_SMB_OPEN_RETRY,
-                        process_open_retry_message);
+       DEBUG(3,("init_oplocks: initializing messages.\n"));
+
+       messaging_register(msg_ctx, NULL, MSG_SMB_BREAK_REQUEST,
+                          process_oplock_break_message);
+       messaging_register(msg_ctx, NULL, MSG_SMB_ASYNC_LEVEL2_BREAK,
+                          process_oplock_async_level2_break_message);
+       messaging_register(msg_ctx, NULL, MSG_SMB_BREAK_RESPONSE,
+                          process_oplock_break_response);
+       messaging_register(msg_ctx, NULL, MSG_SMB_KERNEL_BREAK,
+                          process_kernel_oplock_break);
+       messaging_register(msg_ctx, NULL, MSG_SMB_OPEN_RETRY,
+                          process_open_retry_message);
 
        if (lp_kernel_oplocks()) {
 #if HAVE_KERNEL_OPLOCKS_IRIX
-               koplocks = irix_init_kernel_oplocks();
+               koplocks = irix_init_kernel_oplocks(talloc_autofree_context());
 #elif HAVE_KERNEL_OPLOCKS_LINUX
-               koplocks = linux_init_kernel_oplocks();
+               koplocks = linux_init_kernel_oplocks(talloc_autofree_context());
+#elif HAVE_ONEFS
+               koplocks = onefs_init_kernel_oplocks(talloc_autofree_context());
 #endif
        }