smbd: some write time fixes
[tprouty/samba.git] / source / smbd / fileio.c
index bb7ab46..0958418 100644 (file)
@@ -1,13 +1,13 @@
-#define OLD_NTDOMAIN 1
 /* 
    Unix SMB/Netbios implementation.
    Version 1.9.
    read/write to a files_struct
    Copyright (C) Andrew Tridgell 1992-1998
+   Copyright (C) Jeremy Allison 2000-2002. - write cache.
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include "includes.h"
 
-extern int DEBUGLEVEL;
-
-static BOOL setup_write_cache(files_struct *, SMB_OFF_T);
+static bool setup_write_cache(files_struct *, SMB_OFF_T);
 
 /****************************************************************************
-seek a file. Try to avoid the seek if possible
+ Read from write cache if we can.
 ****************************************************************************/
 
-SMB_OFF_T seek_file(files_struct *fsp,SMB_OFF_T pos)
+static bool read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
 {
-  SMB_OFF_T offset = 0;
-  SMB_OFF_T seek_ret;
-
-  if (fsp->print_file && lp_postscript(fsp->conn->service))
-    offset = 3;
-
-  seek_ret = fsp->conn->vfs_ops.lseek(fsp,fsp->fd,pos+offset,SEEK_SET);
+       write_cache *wcp = fsp->wcp;
 
-  /*
-   * We want to maintain the fiction that we can seek
-   * on a fifo for file system purposes. This allows 
-   * people to set up UNIX fifo's that feed data to Windows
-   * applications. JRA.
-   */
+       if(!wcp) {
+               return False;
+       }
 
-  if((seek_ret == -1) && (errno == ESPIPE)) {
-    seek_ret = pos+offset;
-    errno = 0;
-  }
+       if( n > wcp->data_size || pos < wcp->offset || pos + n > wcp->offset + wcp->data_size) {
+               return False;
+       }
 
-  if((seek_ret == -1) || (seek_ret != pos+offset)) {
-    DEBUG(0,("seek_file: sys_lseek failed. Error was %s\n", strerror(errno) ));
-    fsp->pos = -1;
-    return -1;
-  }
+       memcpy(data, wcp->data + (pos - wcp->offset), n);
 
-  fsp->pos = seek_ret - offset;
+       DO_PROFILE_INC(writecache_read_hits);
 
-  DEBUG(10,("seek_file: requested pos = %.0f, new pos = %.0f\n",
-        (double)(pos+offset), (double)fsp->pos ));
-
-  return(fsp->pos);
+       return True;
 }
 
 /****************************************************************************
- Read from write cache if we can.
+ Read from a file.
 ****************************************************************************/
 
-
-BOOL read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
+ssize_t read_file(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
 {
-  write_cache *wcp = fsp->wcp;
+       ssize_t ret=0,readret;
+
+       /* you can't read from print files */
+       if (fsp->print_file) {
+               return -1;
+       }
+
+       /*
+        * Serve from write cache if we can.
+        */
+
+       if(read_from_write_cache(fsp, data, pos, n)) {
+               fsp->fh->pos = pos + n;
+               fsp->fh->position_information = fsp->fh->pos;
+               return n;
+       }
+
+       flush_write_cache(fsp, READ_FLUSH);
+
+       fsp->fh->pos = pos;
+
+       if (n > 0) {
+#ifdef DMF_FIX
+               int numretries = 3;
+tryagain:
+               readret = SMB_VFS_PREAD(fsp,data,n,pos);
+
+               if (readret == -1) {
+                       if ((errno == EAGAIN) && numretries) {
+                               DEBUG(3,("read_file EAGAIN retry in 10 seconds\n"));
+                               (void)sleep(10);
+                               --numretries;
+                               goto tryagain;
+                       }
+                       return -1;
+               }
+#else /* NO DMF fix. */
+               readret = SMB_VFS_PREAD(fsp,data,n,pos);
+
+               if (readret == -1) {
+                       return -1;
+               }
+#endif
+               if (readret > 0) {
+                       ret += readret;
+               }
+       }
 
-  if(!wcp)
-    return False;
+       DEBUG(10,("read_file (%s): pos = %.0f, size = %lu, returned %lu\n",
+               fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
 
-  if(n > wcp->data_size || pos < wcp->offset || pos + n > wcp->offset + wcp->data_size)
-    return False;
+       fsp->fh->pos += ret;
+       fsp->fh->position_information = fsp->fh->pos;
 
-  memcpy(data, wcp->data + (pos - wcp->offset), n);
+       return(ret);
+}
 
-  DO_PROFILE_INC(writecache_read_hits);
+/* how many write cache buffers have been allocated */
+static unsigned int allocated_write_caches;
 
-  return True;
+/****************************************************************************
+ *Really* write to a file.
+****************************************************************************/
+
+static ssize_t real_write_file(struct smb_request *req,
+                               files_struct *fsp,
+                               const char *data,
+                               SMB_OFF_T pos,
+                               size_t n)
+{
+       ssize_t ret;
+
+        if (pos == -1) {
+                ret = vfs_write_data(req, fsp, data, n);
+        } else {
+               fsp->fh->pos = pos;
+               if (pos && lp_strict_allocate(SNUM(fsp->conn))) {
+                       if (vfs_fill_sparse(fsp, pos) == -1) {
+                               return -1;
+                       }
+               }
+                ret = vfs_pwrite_data(req, fsp, data, n, pos);
+       }
+
+       DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
+               fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
+
+       if (ret != -1) {
+               fsp->fh->pos += ret;
+
+/* Yes - this is correct - writes don't update this. JRA. */
+/* Found by Samba4 tests. */
+#if 0
+               fsp->position_information = fsp->pos;
+#endif
+       }
+
+       return ret;
 }
 
 /****************************************************************************
-read from a file
+ File size cache change.
+ Updates size on disk but doesn't flush the cache.
 ****************************************************************************/
 
-ssize_t read_file(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
+static int wcp_file_size_change(files_struct *fsp)
 {
-  ssize_t ret=0,readret;
-
-  /* you can't read from print files */
-  if (fsp->print_file) {
-         return -1;
-  }
+       int ret;
+       write_cache *wcp = fsp->wcp;
+
+       wcp->file_size = wcp->offset + wcp->data_size;
+       ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
+       if (ret == -1) {
+               DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f error %s\n",
+                       fsp->fsp_name, (double)wcp->file_size, strerror(errno) ));
+       }
+       return ret;
+}
 
-  /*
-   * Serve from write cache if we can.
-   */
-  if(read_from_write_cache(fsp, data, pos, n))
-    return n;
+static void update_write_time_handler(struct event_context *ctx,
+                                     struct timed_event *te,
+                                     const struct timeval *now,
+                                     void *private_data)
+{
+       files_struct *fsp = (files_struct *)private_data;
 
-  flush_write_cache(fsp, READ_FLUSH);
+       /* Remove the timed event handler. */
+       TALLOC_FREE(fsp->update_write_time_event);
+       DEBUG(5, ("Update write time on %s\n", fsp->fsp_name));
 
-  if (seek_file(fsp,pos) == -1) {
-    DEBUG(3,("read_file: Failed to seek to %.0f\n",(double)pos));
-    return(ret);
-  }
-  
-  if (n > 0) {
-    readret = fsp->conn->vfs_ops.read(fsp,fsp->fd,data,n);
-    if (readret == -1)
-      return -1;
-    if (readret > 0) ret += readret;
-  }
-
-  return(ret);
+       /* change the write time if not already changed by someone else */
+       update_write_time(fsp);
 }
 
-/* how many write cache buffers have been allocated */
-static unsigned int allocated_write_caches;
+/*********************************************************
+ Schedule a write time update for WRITE_TIME_UPDATE_USEC_DELAY
+ in the future.
+*********************************************************/
 
-/****************************************************************************
- *Really* write to a file.
-****************************************************************************/
+void trigger_write_time_update(struct files_struct *fsp)
+{
+       int delay;
+
+       if (fsp->write_time_forced) {
+               /* No point - "sticky" write times
+                * in effect.
+                */
+               return;
+       }
+
+       if (fsp->update_write_time_triggered) {
+               /*
+                * No point - an event is already scheduled.
+                */
+               return;
+       }
+       fsp->update_write_time_triggered = true;
+
+       delay = lp_parm_int(SNUM(fsp->conn),
+                           "smbd", "writetimeupdatedelay",
+                           WRITE_TIME_UPDATE_USEC_DELAY);
+
+       /* trigger the update 2 seconds later */
+       fsp->update_write_time_on_close = true;
+       fsp->update_write_time_event =
+               event_add_timed(smbd_event_context(), NULL,
+                               timeval_current_ofs(0, delay),
+                               "update_write_time_handler",
+                               update_write_time_handler, fsp);
+}
 
-static ssize_t real_write_file(files_struct *fsp,char *data,SMB_OFF_T pos, size_t n)
+void trigger_write_time_update_immediate(struct files_struct *fsp)
 {
-  if ((pos != -1) && (seek_file(fsp,pos) == -1))
-    return -1;
+        if (fsp->write_time_forced) {
+               /*
+                * No point - "sticky" write times
+                * in effect.
+                */
+                return;
+        }
+
+       TALLOC_FREE(fsp->update_write_time_event);
+       DEBUG(5, ("Update write time immediate on %s\n", fsp->fsp_name));
+
+       fsp->update_write_time_triggered = true;
 
-  return vfs_write_data(fsp,data,n);
+        fsp->update_write_time_on_close = false;
+       update_write_time(fsp);
 }
 
 /****************************************************************************
-write to a file
+ Write to a file.
 ****************************************************************************/
 
-ssize_t write_file(files_struct *fsp, char *data, SMB_OFF_T pos, size_t n)
+ssize_t write_file(struct smb_request *req,
+                       files_struct *fsp,
+                       const char *data,
+                       SMB_OFF_T pos,
+                       size_t n)
 {
-  write_cache *wcp = fsp->wcp;
-  ssize_t total_written = 0;
-  int write_path = -1; 
-
-  if (fsp->print_file) {
-         return print_job_write(fsp->print_jobid, data, n);
-  }
-
-  if (!fsp->can_write) {
-    errno = EPERM;
-    return(0);
-  }
-
-  if (!fsp->modified) {
-    SMB_STRUCT_STAT st;
-    fsp->modified = True;
-
-    if (fsp->conn->vfs_ops.fstat(fsp,fsp->fd,&st) == 0) {
-      int dosmode = dos_mode(fsp->conn,fsp->fsp_name,&st);
-      if (MAP_ARCHIVE(fsp->conn) && !IS_DOS_ARCHIVE(dosmode)) {        
-        file_chmod(fsp->conn,fsp->fsp_name,dosmode | aARCH,&st);
-      }
-
-      /*
-       * If this is the first write and we have an exclusive oplock then setup
-       * the write cache.
-       */
-
-      if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && !wcp) {
-        setup_write_cache(fsp, st.st_size);
-        wcp = fsp->wcp;
-      } 
-    }  
-  }
+       write_cache *wcp = fsp->wcp;
+       ssize_t total_written = 0;
+       int write_path = -1;
+
+       if (fsp->print_file) {
+               fstring sharename;
+               uint32 jobid;
+
+               if (!rap_to_pjobid(fsp->rap_print_jobid, sharename, &jobid)) {
+                       DEBUG(3,("write_file: Unable to map RAP jobid %u to jobid.\n",
+                                               (unsigned int)fsp->rap_print_jobid ));
+                       errno = EBADF;
+                       return -1;
+               }
+
+               return print_job_write(SNUM(fsp->conn), jobid, data, pos, n);
+       }
+
+       if (!fsp->can_write) {
+               errno = EPERM;
+               return -1;
+       }
+
+       if (!fsp->modified) {
+               SMB_STRUCT_STAT st;
+               fsp->modified = True;
+
+               if (SMB_VFS_FSTAT(fsp, &st) == 0) {
+                       int dosmode;
+                       trigger_write_time_update(fsp);
+                       dosmode = dos_mode(fsp->conn,fsp->fsp_name,&st);
+                       if ((lp_store_dos_attributes(SNUM(fsp->conn)) ||
+                                       MAP_ARCHIVE(fsp->conn)) &&
+                                       !IS_DOS_ARCHIVE(dosmode)) {
+                               file_set_dosmode(fsp->conn,fsp->fsp_name,
+                                               dosmode | aARCH,&st,
+                                               NULL,
+                                               false);
+                       }
+
+                       /*
+                        * If this is the first write and we have an exclusive oplock then setup
+                        * the write cache.
+                        */
+
+                       if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && !wcp) {
+                               setup_write_cache(fsp, st.st_size);
+                               wcp = fsp->wcp;
+                       }
+               }
+       }
 
 #ifdef WITH_PROFILE
-  DO_PROFILE_INC(writecache_total_writes);
-  if (!fsp->oplock_type) {
-    DO_PROFILE_INC(writecache_non_oplock_writes);
-  }
+       DO_PROFILE_INC(writecache_total_writes);
+       if (!fsp->oplock_type) {
+               DO_PROFILE_INC(writecache_non_oplock_writes);
+       }
 #endif
 
-  /*
-   * If this file is level II oplocked then we need
-   * to grab the shared memory lock and inform all
-   * other files with a level II lock that they need
-   * to flush their read caches. We keep the lock over
-   * the shared memory area whilst doing this.
-   */
-
-  if (LEVEL_II_OPLOCK_TYPE(fsp->oplock_type)) {
-    share_mode_entry *share_list = NULL;
-    pid_t pid = sys_getpid();
-    int token = -1;
-    int num_share_modes = 0;
-    int i;
-
-    if (lock_share_entry_fsp(fsp) == False) {
-      DEBUG(0,("write_file: failed to lock share mode entry for file %s.\n", fsp->fsp_name ));
-    }
-
-    num_share_modes = get_share_modes(fsp->conn, fsp->dev, fsp->inode, &share_list);
-
-    for(i = 0; i < num_share_modes; i++) {
-      share_mode_entry *share_entry = &share_list[i];
-
-      /*
-       * As there could have been multiple writes waiting at the lock_share_entry
-       * gate we may not be the first to enter. Hence the state of the op_types
-       * in the share mode entries may be partly NO_OPLOCK and partly LEVEL_II
-       * oplock. It will do no harm to re-send break messages to those smbd's
-       * that are still waiting their turn to remove their LEVEL_II state, and
-       * also no harm to ignore existing NO_OPLOCK states. JRA.
-       */
-
-      if (share_entry->op_type == NO_OPLOCK)
-        continue;
-
-      /* Paranoia .... */
-      if (EXCLUSIVE_OPLOCK_TYPE(share_entry->op_type)) {
-        DEBUG(0,("write_file: PANIC. share mode entry %d is an exlusive oplock !\n", i ));
-        unlock_share_entry(fsp->conn, fsp->dev, fsp->inode);
-        abort();
-      }
-
-      /*
-       * Check if this is a file we have open (including the
-       * file we've been called to do write_file on. If so
-       * then break it directly without releasing the lock.
-       */
-
-      if (pid == share_entry->pid) {
-        files_struct *new_fsp = file_find_dit(fsp->dev, fsp->inode, &share_entry->time);
-
-        /* Paranoia check... */
-        if(new_fsp == NULL) {
-          DEBUG(0,("write_file: PANIC. share mode entry %d is not a local file !\n", i ));
-          unlock_share_entry(fsp->conn, fsp->dev, fsp->inode);
-          abort();
-        }
-        oplock_break_level2(new_fsp, True, token);
-
-      } else {
-
-        /*
-         * This is a remote file and so we send an asynchronous
-         * message.
-         */
+       /*
+        * If this file is level II oplocked then we need
+        * to grab the shared memory lock and inform all
+        * other files with a level II lock that they need
+        * to flush their read caches. We keep the lock over
+        * the shared memory area whilst doing this.
+        */
 
-        request_oplock_break(share_entry, fsp->dev, fsp->inode);
-      }
-    }
-    free((char *)share_list);
-    unlock_share_entry_fsp(fsp);
-  }
-
-  /* Paranoia check... */
-  if (LEVEL_II_OPLOCK_TYPE(fsp->oplock_type)) {
-    DEBUG(0,("write_file: PANIC. File %s still has a level II oplock.\n", fsp->fsp_name));
-    abort();
-  }
+       release_level_2_oplocks_on_change(fsp);
 
 #ifdef WITH_PROFILE
-  if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
-    DEBUG(3,("WRITECACHE: initwrites=%u abutted=%u total=%u \
+       if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
+               DEBUG(3,("WRITECACHE: initwrites=%u abutted=%u total=%u \
 nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
-       profile_p->writecache_init_writes,
-       profile_p->writecache_abutted_writes,
-       profile_p->writecache_total_writes,
-       profile_p->writecache_non_oplock_writes,
-       profile_p->writecache_allocated_write_caches,
-       profile_p->writecache_num_write_caches,
-       profile_p->writecache_direct_writes,
-       profile_p->writecache_num_perfect_writes,
-       profile_p->writecache_read_hits ));
-
-    DEBUG(3,("WRITECACHE: Flushes SEEK=%d, READ=%d, WRITE=%d, READRAW=%d, OPLOCK=%d, CLOSE=%d, SYNC=%d\n",
-       profile_p->writecache_flushed_writes[SEEK_FLUSH],
-       profile_p->writecache_flushed_writes[READ_FLUSH],
-       profile_p->writecache_flushed_writes[WRITE_FLUSH],
-       profile_p->writecache_flushed_writes[READRAW_FLUSH],
-       profile_p->writecache_flushed_writes[OPLOCK_RELEASE_FLUSH],
-       profile_p->writecache_flushed_writes[CLOSE_FLUSH],
-       profile_p->writecache_flushed_writes[SYNC_FLUSH] ));
-  }
+                       profile_p->writecache_init_writes,
+                       profile_p->writecache_abutted_writes,
+                       profile_p->writecache_total_writes,
+                       profile_p->writecache_non_oplock_writes,
+                       profile_p->writecache_allocated_write_caches,
+                       profile_p->writecache_num_write_caches,
+                       profile_p->writecache_direct_writes,
+                       profile_p->writecache_num_perfect_writes,
+                       profile_p->writecache_read_hits ));
+
+               DEBUG(3,("WRITECACHE: Flushes SEEK=%d, READ=%d, WRITE=%d, READRAW=%d, OPLOCK=%d, CLOSE=%d, SYNC=%d\n",
+                       profile_p->writecache_flushed_writes[SEEK_FLUSH],
+                       profile_p->writecache_flushed_writes[READ_FLUSH],
+                       profile_p->writecache_flushed_writes[WRITE_FLUSH],
+                       profile_p->writecache_flushed_writes[READRAW_FLUSH],
+                       profile_p->writecache_flushed_writes[OPLOCK_RELEASE_FLUSH],
+                       profile_p->writecache_flushed_writes[CLOSE_FLUSH],
+                       profile_p->writecache_flushed_writes[SYNC_FLUSH] ));
+       }
 #endif
 
-  if(!wcp) {
-    DO_PROFILE_INC(writecache_direct_writes);
-    return real_write_file(fsp, data, pos, n);
-  }
+       if (wcp && req->unread_bytes) {
+               /* If we're using receivefile don't
+                * deal with a write cache.
+                */
+               flush_write_cache(fsp, WRITE_FLUSH);
+               delete_write_cache(fsp);
+               wcp = NULL;
+       }
+
+       if(!wcp) {
+               DO_PROFILE_INC(writecache_direct_writes);
+               total_written = real_write_file(req, fsp, data, pos, n);
+               return total_written;
+       }
 
-  DEBUG(9,("write_file(fd=%d pos=%d size=%d) wofs=%d wsize=%d\n",
-          fsp->fd, (int)pos, (int)n, (int)wcp->offset, (int)wcp->data_size));
+       DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f wcp->data_size=%u\n",
+               fsp->fsp_name, fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size));
 
-  /* 
-   * If we have active cache and it isn't contiguous then we flush.
-   * NOTE: There is a small problem with running out of disk ....
-   */
+       fsp->fh->pos = pos + n;
 
-  if (wcp->data_size) {
+       /*
+        * If we have active cache and it isn't contiguous then we flush.
+        * NOTE: There is a small problem with running out of disk ....
+        */
 
-    BOOL cache_flush_needed = False;
+       if (wcp->data_size) {
+               bool cache_flush_needed = False;
 
-    if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
+               if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
       
-      /*
-       * Start of write overlaps or abutts the existing data.
-       */
+                       /* ASCII art.... JRA.
 
-      size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
+      +--------------+-----
+      | Cached data  | Rest of allocated cache buffer....
+      +--------------+-----
 
-      memcpy(wcp->data + (pos - wcp->offset), data, data_used);
+            +-------------------+
+            | Data to write     |
+            +-------------------+
 
-      /*
-       * Update the current buffer size with the new data.
-       */
+                       */
 
-      if(pos + data_used > wcp->offset + wcp->data_size)
-        wcp->data_size = pos + data_used - wcp->offset;
+                       /*
+                        * Start of write overlaps or abutts the existing data.
+                        */
 
-      /*
-       * If we used all the data then
-       * return here.
-       */
+                       size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
 
-      if(n == data_used)
-        return n;
-      else
-        cache_flush_needed = True;
+                       memcpy(wcp->data + (pos - wcp->offset), data, data_used);
 
-      /*
-       * Move the start of data forward by the amount used,
-       * cut down the amount left by the same amount.
-       */
+                       /*
+                        * Update the current buffer size with the new data.
+                        */
 
-      data += data_used;
-      pos += data_used;
-      n -= data_used;
+                       if(pos + data_used > wcp->offset + wcp->data_size) {
+                               wcp->data_size = pos + data_used - wcp->offset;
+                       }
 
-      DO_PROFILE_INC(writecache_abutted_writes);
-      total_written = data_used;
+                       /*
+                        * Update the file size if changed.
+                        */
 
-      write_path = 1;
+                       if (wcp->offset + wcp->data_size > wcp->file_size) {
+                               if (wcp_file_size_change(fsp) == -1) {
+                                       return -1;
+                               }
+                       }
 
-    } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
-               (pos + n <= wcp->offset + wcp->alloc_size)) {
+                       /*
+                        * If we used all the data then
+                        * return here.
+                        */
 
-      /*
-       * End of write overlaps the existing data.
-       */
+                       if(n == data_used) {
+                               return n;
+                       } else {
+                               cache_flush_needed = True;
+                       }
+                       /*
+                        * Move the start of data forward by the amount used,
+                        * cut down the amount left by the same amount.
+                        */
 
-      size_t data_used = pos + n - wcp->offset;
+                       data += data_used;
+                       pos += data_used;
+                       n -= data_used;
 
-      memcpy(wcp->data, data + n - data_used, data_used);
+                       DO_PROFILE_INC(writecache_abutted_writes);
+                       total_written = data_used;
 
-      /*
-       * Update the current buffer size with the new data.
-       */
+                       write_path = 1;
 
-      if(pos + n > wcp->offset + wcp->data_size)
-        wcp->data_size = pos + n - wcp->offset;
+               } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
+                                       (pos + n <= wcp->offset + wcp->alloc_size)) {
 
-      /*
-       * We don't need to move the start of data, but we
-       * cut down the amount left by the amount used.
-       */
+                       /* ASCII art.... JRA.
 
-      n -= data_used;
+                        +---------------+
+                        | Cache buffer  |
+                        +---------------+
 
-      /*
-       * We cannot have used all the data here.
-       */
+            +-------------------+
+            | Data to write     |
+            +-------------------+
 
-      cache_flush_needed = True;
+                       */
 
-      DO_PROFILE_INC(writecache_abutted_writes);
-      total_written = data_used;
+                       /*
+                        * End of write overlaps the existing data.
+                        */
 
-      write_path = 2;
+                       size_t data_used = pos + n - wcp->offset;
 
-    } else if ( (pos >= wcp->file_size) && 
-                (pos > wcp->offset + wcp->data_size) && 
-                (pos < wcp->offset + wcp->alloc_size) ) {
+                       memcpy(wcp->data, data + n - data_used, data_used);
 
-      /*
-       * Non-contiguous write part of which fits within
-       * the cache buffer and is extending the file.
-       */
+                       /*
+                        * Update the current buffer size with the new data.
+                        */
 
-      size_t data_used;
+                       if(pos + n > wcp->offset + wcp->data_size) {
+                               wcp->data_size = pos + n - wcp->offset;
+                       }
 
-      if(pos + n <= wcp->offset + wcp->alloc_size)
-        data_used = n;
-      else
-        data_used = wcp->offset + wcp->alloc_size - pos;
+                       /*
+                        * Update the file size if changed.
+                        */
 
-      /*
-       * Fill in the non-continuous area with zeros.
-       */
+                       if (wcp->offset + wcp->data_size > wcp->file_size) {
+                               if (wcp_file_size_change(fsp) == -1) {
+                                       return -1;
+                               }
+                       }
 
-      memset(wcp->data + wcp->data_size, '\0',
-             pos - (wcp->offset + wcp->data_size) );
+                       /*
+                        * We don't need to move the start of data, but we
+                        * cut down the amount left by the amount used.
+                        */
 
-      memcpy(wcp->data + (pos - wcp->offset), data, data_used);
+                       n -= data_used;
 
-      /*
-       * Update the current buffer size with the new data.
-       */
+                       /*
+                        * We cannot have used all the data here.
+                        */
 
-      if(pos + data_used > wcp->offset + wcp->data_size)
-        wcp->data_size = pos + data_used - wcp->offset;
+                       cache_flush_needed = True;
 
-      /*
-       * Update the known file length.
-       */
+                       DO_PROFILE_INC(writecache_abutted_writes);
+                       total_written = data_used;
 
-      wcp->file_size = wcp->offset + wcp->data_size;
+                       write_path = 2;
 
-#if 0
-      if (set_filelen(fsp->fd, wcp->file_size) == -1) {
-        DEBUG(0,("write_file: error %s in setting file to length %.0f\n",
-          strerror(errno), (double)wcp->file_size ));
-        return -1;
-      }
-#endif
+               } else if ( (pos >= wcp->file_size) && 
+                                       (wcp->offset + wcp->data_size == wcp->file_size) &&
+                                       (pos > wcp->offset + wcp->data_size) && 
+                                       (pos < wcp->offset + wcp->alloc_size) ) {
+
+                       /* ASCII art.... JRA.
+
+                       End of file ---->|
+
+                        +---------------+---------------+
+                        | Cached data   | Cache buffer  |
+                        +---------------+---------------+
+
+                                              +-------------------+
+                                              | Data to write     |
+                                              +-------------------+
+
+                       */
+
+                       /*
+                        * Non-contiguous write part of which fits within
+                        * the cache buffer and is extending the file
+                        * and the cache contents reflect the current
+                        * data up to the current end of the file.
+                        */
+
+                       size_t data_used;
+
+                       if(pos + n <= wcp->offset + wcp->alloc_size) {
+                               data_used = n;
+                       } else {
+                               data_used = wcp->offset + wcp->alloc_size - pos;
+                       }
 
-      /*
-       * If we used all the data then
-       * return here.
-       */
+                       /*
+                        * Fill in the non-continuous area with zeros.
+                        */
 
-      if(n == data_used)
-        return n;
-      else
-        cache_flush_needed = True;
+                       memset(wcp->data + wcp->data_size, '\0',
+                               pos - (wcp->offset + wcp->data_size) );
 
-      /*
-       * Move the start of data forward by the amount used,
-       * cut down the amount left by the same amount.
-       */
+                       memcpy(wcp->data + (pos - wcp->offset), data, data_used);
 
-      data += data_used;
-      pos += data_used;
-      n -= data_used;
+                       /*
+                        * Update the current buffer size with the new data.
+                        */
 
-      DO_PROFILE_INC(writecache_abutted_writes);
-      total_written = data_used;
+                       if(pos + data_used > wcp->offset + wcp->data_size) {
+                               wcp->data_size = pos + data_used - wcp->offset;
+                       }
 
-      write_path = 3;
+                       /*
+                        * Update the file size if changed.
+                        */
 
-    } else {
+                       if (wcp->offset + wcp->data_size > wcp->file_size) {
+                               if (wcp_file_size_change(fsp) == -1) {
+                                       return -1;
+                               }
+                       }
 
-      /*
-       * Write is bigger than buffer, or there is no overlap on the
-       * low or high ends.
-       */
+                       /*
+                        * If we used all the data then
+                        * return here.
+                        */
 
-      DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
-len = %u\n",fsp->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
+                       if(n == data_used) {
+                               return n;
+                       } else {
+                               cache_flush_needed = True;
+                       }
 
-      /*
-       * Update the file size if needed.
-       */
+                       /*
+                        * Move the start of data forward by the amount used,
+                        * cut down the amount left by the same amount.
+                        */
 
-      if(pos + n > wcp->file_size)
-        wcp->file_size = pos + n;
+                       data += data_used;
+                       pos += data_used;
+                       n -= data_used;
 
-      /*
-       * If write would fit in the cache, and is larger than
-       * the data already in the cache, flush the cache and
-       * preferentially copy the data new data into it. Otherwise
-       * just write the data directly.
-       */
+                       DO_PROFILE_INC(writecache_abutted_writes);
+                       total_written = data_used;
 
-      if ( n <= wcp->alloc_size && n > wcp->data_size) {
-        cache_flush_needed = True;
-      } else {
-       DO_PROFILE_INC(writecache_direct_writes);
-        return real_write_file(fsp, data, pos, n);
-      }
+                       write_path = 3;
 
-      write_path = 4;
+                } else if ( (pos >= wcp->file_size) &&
+                           (n == 1) &&
+                           (wcp->file_size == wcp->offset + wcp->data_size) &&
+                           (pos < wcp->file_size + wcp->alloc_size)) {
 
-    }
+                        /*
 
-    if(wcp->data_size > wcp->file_size)
-      wcp->file_size = wcp->data_size;
+                End of file ---->|
 
-    if (cache_flush_needed) {
-      DEBUG(3,("WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
+                 +---------------+---------------+
+                 | Cached data   | Cache buffer  |
+                 +---------------+---------------+
+
+                                 |<------- allocated size ---------------->|
+
+                                                         +--------+
+                                                         | 1 Byte |
+                                                         +--------+
+
+                       MS-Office seems to do this a lot to determine if there's enough
+                       space on the filesystem to write a new file.
+
+                       Change to :
+
+                End of file ---->|
+                                 +-----------------------+--------+
+                                 | Zeroed Cached data    | 1 Byte |
+                                 +-----------------------+--------+
+                        */
+
+                       flush_write_cache(fsp, WRITE_FLUSH);
+                       wcp->offset = wcp->file_size;
+                       wcp->data_size = pos - wcp->file_size + 1;
+                       memset(wcp->data, '\0', wcp->data_size);
+                       memcpy(wcp->data + wcp->data_size-1, data, 1);
+
+                       /*
+                        * Update the file size if changed.
+                        */
+
+                       if (wcp->offset + wcp->data_size > wcp->file_size) {
+                               if (wcp_file_size_change(fsp) == -1) {
+                                       return -1;
+                               }
+                       }
+
+                       return n;
+
+               } else {
+
+                       /* ASCII art..... JRA.
+
+   Case 1).
+
+                        +---------------+---------------+
+                        | Cached data   | Cache buffer  |
+                        +---------------+---------------+
+
+                                                              +-------------------+
+                                                              | Data to write     |
+                                                              +-------------------+
+
+   Case 2).
+
+                           +---------------+---------------+
+                           | Cached data   | Cache buffer  |
+                           +---------------+---------------+
+
+   +-------------------+
+   | Data to write     |
+   +-------------------+
+
+    Case 3).
+
+                           +---------------+---------------+
+                           | Cached data   | Cache buffer  |
+                           +---------------+---------------+
+
+                  +-----------------------------------------------------+
+                  | Data to write                                       |
+                  +-----------------------------------------------------+
+
+                 */
+
+                       /*
+                        * Write is bigger than buffer, or there is no overlap on the
+                        * low or high ends.
+                        */
+
+                       DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
+len = %u\n",fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
+
+                       /*
+                        * If write would fit in the cache, and is larger than
+                        * the data already in the cache, flush the cache and
+                        * preferentially copy the data new data into it. Otherwise
+                        * just write the data directly.
+                        */
+
+                       if ( n <= wcp->alloc_size && n > wcp->data_size) {
+                               cache_flush_needed = True;
+                       } else {
+                               ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
+
+                               /*
+                                * If the write overlaps the entire cache, then
+                                * discard the current contents of the cache.
+                                * Fix from Rasmus Borup Hansen rbh@math.ku.dk.
+                                */
+
+                               if ((pos <= wcp->offset) &&
+                                               (pos + n >= wcp->offset + wcp->data_size) ) {
+                                       DEBUG(9,("write_file: discarding overwritten write \
+cache: fd = %d, off=%.0f, size=%u\n", fsp->fh->fd, (double)wcp->offset, (unsigned int)wcp->data_size ));
+                                       wcp->data_size = 0;
+                               }
+
+                               DO_PROFILE_INC(writecache_direct_writes);
+                               if (ret == -1) {
+                                       return ret;
+                               }
+
+                               if (pos + ret > wcp->file_size) {
+                                       wcp->file_size = pos + ret;
+                               }
+
+                               return ret;
+                       }
+
+                       write_path = 4;
+
+               }
+
+               if (cache_flush_needed) {
+                       DEBUG(3,("WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
 n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
-             write_path, fsp->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
-             (double)wcp->offset, (unsigned int)wcp->data_size ));
-
-      flush_write_cache(fsp, WRITE_FLUSH);
-    }
-  }
-
-  /*
-   * If the write request is bigger than the cache
-   * size, write it all out.
-   */
-
-  if (n > wcp->alloc_size ) {
-    if(real_write_file(fsp, data, pos, n) == -1)
-      return -1;
-    DO_PROFILE_INC(writecache_direct_writes);
-    return total_written + n;
-  }
-
-  /*
-   * If there's any data left, cache it.
-   */
-
-  if (n) {
+                               write_path, fsp->fh->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
+                               (double)wcp->offset, (unsigned int)wcp->data_size ));
+
+                       flush_write_cache(fsp, WRITE_FLUSH);
+               }
+       }
+
+       /*
+        * If the write request is bigger than the cache
+        * size, write it all out.
+        */
+
+       if (n > wcp->alloc_size ) {
+               ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
+               if (ret == -1) {
+                       return -1;
+               }
+
+               if (pos + ret > wcp->file_size) {
+                       wcp->file_size = pos + n;
+               }
+
+               DO_PROFILE_INC(writecache_direct_writes);
+               return total_written + n;
+       }
+
+       /*
+        * If there's any data left, cache it.
+        */
+
+       if (n) {
 #ifdef WITH_PROFILE
-    if (wcp->data_size) {
-      DO_PROFILE_INC(writecache_abutted_writes);
-    } else {
-      DO_PROFILE_INC(writecache_init_writes);
-    }
+               if (wcp->data_size) {
+                       DO_PROFILE_INC(writecache_abutted_writes);
+               } else {
+                       DO_PROFILE_INC(writecache_init_writes);
+               }
 #endif
-    memcpy(wcp->data+wcp->data_size, data, n);
-    if (wcp->data_size == 0) {
-      wcp->offset = pos;
-      DO_PROFILE_INC(writecache_num_write_caches);
-    }
-    wcp->data_size += n;
-    DEBUG(9,("cache return %u\n", (unsigned int)n));
-    total_written += n;
-    return total_written; /* .... that's a write :) */
-  }
+               memcpy(wcp->data+wcp->data_size, data, n);
+               if (wcp->data_size == 0) {
+                       wcp->offset = pos;
+                       DO_PROFILE_INC(writecache_num_write_caches);
+               }
+               wcp->data_size += n;
+
+               /*
+                * Update the file size if changed.
+                */
+
+               if (wcp->offset + wcp->data_size > wcp->file_size) {
+                       if (wcp_file_size_change(fsp) == -1) {
+                               return -1;
+                       }
+               }
+               DEBUG(9,("wcp->offset = %.0f wcp->data_size = %u cache return %u\n",
+                       (double)wcp->offset, (unsigned int)wcp->data_size, (unsigned int)n));
+
+               total_written += n;
+               return total_written; /* .... that's a write :) */
+       }
   
-  return total_written;
+       return total_written;
 }
 
 /****************************************************************************
@@ -549,67 +789,70 @@ n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
 
 void delete_write_cache(files_struct *fsp)
 {
-  write_cache *wcp;
+       write_cache *wcp;
 
-  if(!fsp)
-    return;
+       if(!fsp) {
+               return;
+       }
 
-  if(!(wcp = fsp->wcp))
-    return;
+       if(!(wcp = fsp->wcp)) {
+               return;
+       }
 
-  DO_PROFILE_DEC(writecache_allocated_write_caches);
-  allocated_write_caches--;
+       DO_PROFILE_DEC(writecache_allocated_write_caches);
+       allocated_write_caches--;
 
-  SMB_ASSERT(wcp->data_size == 0);
+       SMB_ASSERT(wcp->data_size == 0);
 
-  free(wcp->data);
-  free(wcp);
-
-  fsp->wcp = NULL;
-
-  DEBUG(10,("delete_write_cache: File %s deleted write cache\n", fsp->fsp_name ));
+       SAFE_FREE(wcp->data);
+       SAFE_FREE(fsp->wcp);
 
+       DEBUG(10,("delete_write_cache: File %s deleted write cache\n", fsp->fsp_name ));
 }
 
 /****************************************************************************
  Setup the write cache structure.
 ****************************************************************************/
 
-static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
+static bool setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
 {
-  ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
-  write_cache *wcp;
-
-  if (allocated_write_caches >= MAX_WRITE_CACHES) 
-       return False;
-
-  if(alloc_size == 0 || fsp->wcp)
-    return False;
-
-  if((wcp = (write_cache *)malloc(sizeof(write_cache))) == NULL) {
-    DEBUG(0,("setup_write_cache: malloc fail.\n"));
-    return False;
-  }
-
-  wcp->file_size = file_size;
-  wcp->offset = 0;
-  wcp->alloc_size = alloc_size;
-  wcp->data_size = 0;
-  if((wcp->data = malloc(wcp->alloc_size)) == NULL) {
-    DEBUG(0,("setup_write_cache: malloc fail for buffer size %u.\n",
-          (unsigned int)wcp->alloc_size ));
-    free(wcp);
-    return False;
-  }
-
-  fsp->wcp = wcp;
-  DO_PROFILE_INC(writecache_allocated_write_caches);
-  allocated_write_caches++;
-
-  DEBUG(10,("setup_write_cache: File %s allocated write cache size %u\n",
-               fsp->fsp_name, wcp->alloc_size ));
-
-  return True;
+       ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
+       write_cache *wcp;
+
+       if (allocated_write_caches >= MAX_WRITE_CACHES) {
+               return False;
+       }
+
+       if(alloc_size == 0 || fsp->wcp) {
+               return False;
+       }
+
+       if((wcp = SMB_MALLOC_P(write_cache)) == NULL) {
+               DEBUG(0,("setup_write_cache: malloc fail.\n"));
+               return False;
+       }
+
+       wcp->file_size = file_size;
+       wcp->offset = 0;
+       wcp->alloc_size = alloc_size;
+       wcp->data_size = 0;
+       if((wcp->data = (char *)SMB_MALLOC(wcp->alloc_size)) == NULL) {
+               DEBUG(0,("setup_write_cache: malloc fail for buffer size %u.\n",
+                       (unsigned int)wcp->alloc_size ));
+               SAFE_FREE(wcp);
+               return False;
+       }
+
+       memset(wcp->data, '\0', wcp->alloc_size );
+
+       fsp->wcp = wcp;
+       DO_PROFILE_INC(writecache_allocated_write_caches);
+       allocated_write_caches++;
+
+       DEBUG(10,("setup_write_cache: File %s allocated write cache size %lu\n",
+               fsp->fsp_name, (unsigned long)wcp->alloc_size ));
+
+       return True;
 }
 
 /****************************************************************************
@@ -618,10 +861,18 @@ static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
 
 void set_filelen_write_cache(files_struct *fsp, SMB_OFF_T file_size)
 {
-  if(fsp->wcp) {
-    flush_write_cache(fsp, SIZECHANGE_FLUSH);
-    fsp->wcp->file_size = file_size;
-  }
+       if(fsp->wcp) {
+               /* The cache *must* have been flushed before we do this. */
+               if (fsp->wcp->data_size != 0) {
+                       char *msg;
+                       asprintf(&msg, "set_filelen_write_cache: size change "
+                                "on file %s with write cache size = %lu\n",
+                                fsp->fsp_name,
+                                (unsigned long)fsp->wcp->data_size);
+                       smb_panic(msg);
+               }
+               fsp->wcp->file_size = file_size;
+       }
 }
 
 /*******************************************************************
@@ -630,37 +881,73 @@ void set_filelen_write_cache(files_struct *fsp, SMB_OFF_T file_size)
 
 ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
 {
-  write_cache *wcp = fsp->wcp;
-  size_t data_size;
+       write_cache *wcp = fsp->wcp;
+       size_t data_size;
+       ssize_t ret;
 
-  if(!wcp || !wcp->data_size)
-    return 0;
+       if(!wcp || !wcp->data_size) {
+               return 0;
+       }
 
-  data_size = wcp->data_size;
-  wcp->data_size = 0;
+       data_size = wcp->data_size;
+       wcp->data_size = 0;
 
-  DO_PROFILE_DEC_INC(writecache_num_write_caches,writecache_flushed_writes[reason]);
+       DO_PROFILE_DEC_INC(writecache_num_write_caches,writecache_flushed_writes[reason]);
 
-  DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
-          fsp->fd, (double)wcp->offset, (unsigned int)data_size));
+       DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
+               fsp->fh->fd, (double)wcp->offset, (unsigned int)data_size));
 
 #ifdef WITH_PROFILE
-  if(data_size == wcp->alloc_size)
-    DO_PROFILE_INC(writecache_num_perfect_writes);
+       if(data_size == wcp->alloc_size) {
+               DO_PROFILE_INC(writecache_num_perfect_writes);
+       }
 #endif
 
-  return real_write_file(fsp, wcp->data, wcp->offset, data_size);
+       ret = real_write_file(NULL, fsp, wcp->data, wcp->offset, data_size);
+
+       /*
+        * Ensure file size if kept up to date if write extends file.
+        */
+
+       if ((ret != -1) && (wcp->offset + ret > wcp->file_size)) {
+               wcp->file_size = wcp->offset + ret;
+       }
+
+       return ret;
 }
 
 /*******************************************************************
 sync a file
 ********************************************************************/
 
-void sync_file(connection_struct *conn, files_struct *fsp)
+NTSTATUS sync_file(connection_struct *conn, files_struct *fsp, bool write_through)
+{
+               if (fsp->fh->fd == -1)
+               return NT_STATUS_INVALID_HANDLE;
+
+       if (lp_strict_sync(SNUM(conn)) &&
+           (lp_syncalways(SNUM(conn)) || write_through)) {
+               int ret = flush_write_cache(fsp, SYNC_FLUSH);
+               if (ret == -1) {
+                       return map_nt_error_from_unix(errno);
+               }
+               ret = SMB_VFS_FSYNC(fsp);
+               if (ret == -1) {
+                       return map_nt_error_from_unix(errno);
+               }
+       }
+       return NT_STATUS_OK;
+}
+
+/************************************************************
+ Perform a stat whether a valid fd or not.
+************************************************************/
+
+int fsp_stat(files_struct *fsp, SMB_STRUCT_STAT *pst)
 {
-    if(lp_strict_sync(SNUM(conn)) && fsp->fd != -1) {
-      flush_write_cache(fsp, SYNC_FLUSH);
-      conn->vfs_ops.fsync(fsp,fsp->fd);
-    }
+       if (fsp->fh->fd == -1) {
+               return SMB_VFS_STAT(fsp->conn, fsp->fsp_name, pst);
+       } else {
+               return SMB_VFS_FSTAT(fsp, pst);
+       }
 }
-#undef OLD_NTDOMAIN