s3: vfs: Use the new smb_vfs_fsync_sync() call in place of SMB_VFS_FSYNC().
[samba.git] / source3 / smbd / fileio.c
index e0945be8893e220031b13ce91899929ecf478aff..7b17889b55c3e2fe00e0107277956708a1ac0efb 100644 (file)
@@ -1,36 +1,47 @@
-/* 
+/*
    Unix SMB/Netbios implementation.
    Version 1.9.
    read/write to a files_struct
    Copyright (C) Andrew Tridgell 1992-1998
    Copyright (C) Jeremy Allison 2000-2002. - write cache.
-   
+
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
+   the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.
-   
+
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include "includes.h"
+#include "printing.h"
+#include "smbd/smbd.h"
+#include "smbd/globals.h"
+#include "smbprofile.h"
+
+struct write_cache {
+       off_t file_size;
+       off_t offset;
+       size_t alloc_size;
+       size_t data_size;
+       char *data;
+};
 
-static BOOL setup_write_cache(files_struct *, SMB_OFF_T);
+static bool setup_write_cache(files_struct *, off_t);
 
 /****************************************************************************
  Read from write cache if we can.
 ****************************************************************************/
 
-static BOOL read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
+static bool read_from_write_cache(files_struct *fsp,char *data,off_t pos,size_t n)
 {
-       write_cache *wcp = fsp->wcp;
+       struct write_cache *wcp = fsp->wcp;
 
        if(!wcp) {
                return False;
@@ -42,7 +53,7 @@ static BOOL read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,siz
 
        memcpy(data, wcp->data + (pos - wcp->offset), n);
 
-       DO_PROFILE_INC(writecache_read_hits);
+       DO_PROFILE_INC(writecache_cached_reads);
 
        return True;
 }
@@ -51,12 +62,13 @@ static BOOL read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,siz
  Read from a file.
 ****************************************************************************/
 
-ssize_t read_file(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
+ssize_t read_file(files_struct *fsp,char *data,off_t pos,size_t n)
 {
-       ssize_t ret=0,readret;
+       ssize_t ret = 0;
 
        /* you can't read from print files */
        if (fsp->print_file) {
+               errno = EBADF;
                return -1;
        }
 
@@ -70,39 +82,20 @@ ssize_t read_file(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
                return n;
        }
 
-       flush_write_cache(fsp, READ_FLUSH);
+       flush_write_cache(fsp, SAMBA_READ_FLUSH);
 
        fsp->fh->pos = pos;
 
        if (n > 0) {
-#ifdef DMF_FIX
-               int numretries = 3;
-tryagain:
-               readret = SMB_VFS_PREAD(fsp,fsp->fh->fd,data,n,pos);
-
-               if (readret == -1) {
-                       if ((errno == EAGAIN) && numretries) {
-                               DEBUG(3,("read_file EAGAIN retry in 10 seconds\n"));
-                               (void)sleep(10);
-                               --numretries;
-                               goto tryagain;
-                       }
-                       return -1;
-               }
-#else /* NO DMF fix. */
-               readret = SMB_VFS_PREAD(fsp,fsp->fh->fd,data,n,pos);
+               ret = SMB_VFS_PREAD(fsp,data,n,pos);
 
-               if (readret == -1) {
+               if (ret == -1) {
                        return -1;
                }
-#endif
-               if (readret > 0) {
-                       ret += readret;
-               }
        }
 
        DEBUG(10,("read_file (%s): pos = %.0f, size = %lu, returned %lu\n",
-               fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
+                 fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
 
        fsp->fh->pos += ret;
        fsp->fh->position_information = fsp->fh->pos;
@@ -110,55 +103,37 @@ tryagain:
        return(ret);
 }
 
-/* how many write cache buffers have been allocated */
-static unsigned int allocated_write_caches;
-
 /****************************************************************************
  *Really* write to a file.
 ****************************************************************************/
 
-static ssize_t real_write_file(files_struct *fsp,const char *data, SMB_OFF_T pos, size_t n)
+static ssize_t real_write_file(struct smb_request *req,
+                               files_struct *fsp,
+                               const char *data,
+                               off_t pos,
+                               size_t n)
 {
        ssize_t ret;
 
         if (pos == -1) {
-                ret = vfs_write_data(fsp, data, n);
+                ret = vfs_write_data(req, fsp, data, n);
         } else {
                fsp->fh->pos = pos;
-               if (pos && lp_strict_allocate(SNUM(fsp->conn))) {
+               if (pos && lp_strict_allocate(SNUM(fsp->conn) &&
+                               !fsp->is_sparse)) {
                        if (vfs_fill_sparse(fsp, pos) == -1) {
                                return -1;
                        }
                }
-                ret = vfs_pwrite_data(fsp, data, n, pos);
+                ret = vfs_pwrite_data(req, fsp, data, n, pos);
        }
 
        DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
-               fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
+                 fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
 
        if (ret != -1) {
                fsp->fh->pos += ret;
 
-               /*
-                * It turns out that setting the last write time from a Windows
-                * client stops any subsequent writes from updating the write time.
-                * Doing this after the write gives a race condition here where
-                * a stat may see the changed write time before we reset it here,
-                * but it's cheaper than having to store the write time in shared
-                * memory and look it up using dev/inode across all running smbd's.
-                * The 99% solution will hopefully be good enough in this case. JRA.
-                */
-
-               if (fsp->pending_modtime) {
-                       set_filetime(fsp->conn, fsp->fsp_name, fsp->pending_modtime);
-
-                       /* If we didn't get the "set modtime" call ourselves, we must
-                          store the last write time to restore on close. JRA. */
-                       if (!fsp->pending_modtime_owner) {
-                               fsp->last_write_time = time(NULL);
-                       }
-               }
-
 /* Yes - this is correct - writes don't update this. JRA. */
 /* Found by Samba4 tests. */
 #if 0
@@ -177,39 +152,178 @@ static ssize_t real_write_file(files_struct *fsp,const char *data, SMB_OFF_T pos
 static int wcp_file_size_change(files_struct *fsp)
 {
        int ret;
-       write_cache *wcp = fsp->wcp;
+       struct write_cache *wcp = fsp->wcp;
 
        wcp->file_size = wcp->offset + wcp->data_size;
-       ret = SMB_VFS_FTRUNCATE(fsp, fsp->fh->fd, wcp->file_size);
+       ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
        if (ret == -1) {
-               DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f error %s\n",
-                       fsp->fsp_name, (double)wcp->file_size, strerror(errno) ));
+               DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f "
+                        "error %s\n", fsp_str_dbg(fsp),
+                        (double)wcp->file_size, strerror(errno)));
        }
        return ret;
 }
 
+void update_write_time_handler(struct tevent_context *ctx,
+                                     struct tevent_timer *te,
+                                     struct timeval now,
+                                     void *private_data)
+{
+       files_struct *fsp = (files_struct *)private_data;
+
+       DEBUG(5, ("Update write time on %s\n", fsp_str_dbg(fsp)));
+
+       /* change the write time in the open file db. */
+       (void)set_write_time(fsp->file_id, timespec_current());
+
+       /* And notify. */
+        notify_fname(fsp->conn, NOTIFY_ACTION_MODIFIED,
+                     FILE_NOTIFY_CHANGE_LAST_WRITE, fsp->fsp_name->base_name);
+
+       /* Remove the timed event handler. */
+       TALLOC_FREE(fsp->update_write_time_event);
+}
+
+/*********************************************************
+ Schedule a write time update for WRITE_TIME_UPDATE_USEC_DELAY
+ in the future.
+*********************************************************/
+
+void trigger_write_time_update(struct files_struct *fsp)
+{
+       int delay;
+
+       if (fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) {
+               /* Don't use delayed writes on POSIX files. */
+               return;
+       }
+
+       if (fsp->write_time_forced) {
+               /* No point - "sticky" write times
+                * in effect.
+                */
+               return;
+       }
+
+       /* We need to remember someone did a write
+        * and update to current time on close. */
+
+       fsp->update_write_time_on_close = true;
+
+       if (fsp->update_write_time_triggered) {
+               /*
+                * We only update the write time after 2 seconds
+                * on the first normal write. After that
+                * no other writes affect this until close.
+                */
+               return;
+       }
+       fsp->update_write_time_triggered = true;
+
+       delay = lp_parm_int(SNUM(fsp->conn),
+                           "smbd", "writetimeupdatedelay",
+                           WRITE_TIME_UPDATE_USEC_DELAY);
+
+       DEBUG(5, ("Update write time %d usec later on %s\n",
+                 delay, fsp_str_dbg(fsp)));
+
+       /* trigger the update 2 seconds later */
+       fsp->update_write_time_event =
+               tevent_add_timer(fsp->conn->sconn->ev_ctx, NULL,
+                                timeval_current_ofs_usec(delay),
+                                update_write_time_handler, fsp);
+}
+
+void trigger_write_time_update_immediate(struct files_struct *fsp)
+{
+       struct smb_file_time ft;
+
+       if (fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) {
+               /* Don't use delayed writes on POSIX files. */
+               return;
+       }
+
+        if (fsp->write_time_forced) {
+               /*
+                * No point - "sticky" write times
+                * in effect.
+                */
+                return;
+        }
+
+       TALLOC_FREE(fsp->update_write_time_event);
+       DEBUG(5, ("Update write time immediate on %s\n",
+                 fsp_str_dbg(fsp)));
+
+       /* After an immediate update, reset the trigger. */
+       fsp->update_write_time_triggered = true;
+        fsp->update_write_time_on_close = false;
+
+       ZERO_STRUCT(ft);
+       ft.mtime = timespec_current();
+
+       /* Update the time in the open file db. */
+       (void)set_write_time(fsp->file_id, ft.mtime);
+
+       /* Now set on disk - takes care of notify. */
+       (void)smb_set_file_time(fsp->conn, fsp, fsp->fsp_name, &ft, false);
+}
+
+void mark_file_modified(files_struct *fsp)
+{
+       int dosmode;
+
+       if (fsp->modified) {
+               return;
+       }
+
+       fsp->modified = true;
+
+       if (SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st) != 0) {
+               return;
+       }
+       trigger_write_time_update(fsp);
+
+       if (fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) {
+               return;
+       }
+       if (!(lp_store_dos_attributes(SNUM(fsp->conn)) ||
+             MAP_ARCHIVE(fsp->conn))) {
+               return;
+       }
+
+       dosmode = dos_mode(fsp->conn, fsp->fsp_name);
+       if (IS_DOS_ARCHIVE(dosmode)) {
+               return;
+       }
+       file_set_dosmode(fsp->conn, fsp->fsp_name,
+                        dosmode | FILE_ATTRIBUTE_ARCHIVE, NULL, false);
+}
+
 /****************************************************************************
  Write to a file.
 ****************************************************************************/
 
-ssize_t write_file(files_struct *fsp, const char *data, SMB_OFF_T pos, size_t n)
+ssize_t write_file(struct smb_request *req,
+                       files_struct *fsp,
+                       const char *data,
+                       off_t pos,
+                       size_t n)
 {
-       write_cache *wcp = fsp->wcp;
+       struct write_cache *wcp = fsp->wcp;
        ssize_t total_written = 0;
-       int write_path = -1; 
+       int write_path = -1;
 
        if (fsp->print_file) {
-               fstring sharename;
-               uint32 jobid;
+               uint32_t t;
+               int ret;
 
-               if (!rap_to_pjobid(fsp->rap_print_jobid, sharename, &jobid)) {
-                       DEBUG(3,("write_file: Unable to map RAP jobid %u to jobid.\n",
-                                               (unsigned int)fsp->rap_print_jobid ));
-                       errno = EBADF;
+               ret = print_spool_write(fsp, data, n, pos, &t);
+               if (ret) {
+                       errno = ret;
                        return -1;
                }
-
-               return print_job_write(SNUM(fsp->conn), jobid, data, pos, n);
+               return t;
        }
 
        if (!fsp->can_write) {
@@ -217,34 +331,29 @@ ssize_t write_file(files_struct *fsp, const char *data, SMB_OFF_T pos, size_t n)
                return -1;
        }
 
-       if (!fsp->modified) {
-               SMB_STRUCT_STAT st;
-               fsp->modified = True;
-
-               if (SMB_VFS_FSTAT(fsp,fsp->fh->fd,&st) == 0) {
-                       int dosmode = dos_mode(fsp->conn,fsp->fsp_name,&st);
-                       if ((lp_store_dos_attributes(SNUM(fsp->conn)) || MAP_ARCHIVE(fsp->conn)) && !IS_DOS_ARCHIVE(dosmode)) {
-                               file_set_dosmode(fsp->conn,fsp->fsp_name,dosmode | aARCH,&st, False);
-                       }
-
-                       /*
-                        * If this is the first write and we have an exclusive oplock then setup
-                        * the write cache.
-                        */
+       /*
+        * If this is the first write and we have an exclusive oplock
+        * then setup the write cache.
+        */
 
-                       if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && !wcp) {
-                               setup_write_cache(fsp, st.st_size);
-                               wcp = fsp->wcp;
-                       } 
-               }  
+       if (!fsp->modified &&
+           EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) &&
+           (wcp == NULL)) {
+               /*
+                * Note: no write cache with leases!
+                * as the handles would have to share the write cache
+                * that's possible but an improvement for another day...
+                */
+               setup_write_cache(fsp, fsp->fsp_name->st.st_ex_size);
+               wcp = fsp->wcp;
        }
 
-#ifdef WITH_PROFILE
+       mark_file_modified(fsp);
+
        DO_PROFILE_INC(writecache_total_writes);
        if (!fsp->oplock_type) {
                DO_PROFILE_INC(writecache_non_oplock_writes);
        }
-#endif
 
        /*
         * If this file is level II oplocked then we need
@@ -254,54 +363,74 @@ ssize_t write_file(files_struct *fsp, const char *data, SMB_OFF_T pos, size_t n)
         * the shared memory area whilst doing this.
         */
 
-       release_level_2_oplocks_on_change(fsp);
-
-#ifdef WITH_PROFILE
-       if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
-               DEBUG(3,("WRITECACHE: initwrites=%u abutted=%u total=%u \
-nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
-                       profile_p->writecache_init_writes,
-                       profile_p->writecache_abutted_writes,
-                       profile_p->writecache_total_writes,
-                       profile_p->writecache_non_oplock_writes,
-                       profile_p->writecache_allocated_write_caches,
-                       profile_p->writecache_num_write_caches,
-                       profile_p->writecache_direct_writes,
-                       profile_p->writecache_num_perfect_writes,
-                       profile_p->writecache_read_hits ));
-
-               DEBUG(3,("WRITECACHE: Flushes SEEK=%d, READ=%d, WRITE=%d, READRAW=%d, OPLOCK=%d, CLOSE=%d, SYNC=%d\n",
-                       profile_p->writecache_flushed_writes[SEEK_FLUSH],
-                       profile_p->writecache_flushed_writes[READ_FLUSH],
-                       profile_p->writecache_flushed_writes[WRITE_FLUSH],
-                       profile_p->writecache_flushed_writes[READRAW_FLUSH],
-                       profile_p->writecache_flushed_writes[OPLOCK_RELEASE_FLUSH],
-                       profile_p->writecache_flushed_writes[CLOSE_FLUSH],
-                       profile_p->writecache_flushed_writes[SYNC_FLUSH] ));
+       /* This should actually be improved to span the write. */
+       contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
+       contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
+
+       if (wcp && req->unread_bytes) {
+               /* If we're using receivefile don't
+                * deal with a write cache.
+                */
+               flush_write_cache(fsp, SAMBA_WRITE_FLUSH);
+               delete_write_cache(fsp);
+               wcp = NULL;
        }
-#endif
 
        if(!wcp) {
                DO_PROFILE_INC(writecache_direct_writes);
-               total_written = real_write_file(fsp, data, pos, n);
+               total_written = real_write_file(req, fsp, data, pos, n);
                return total_written;
        }
 
-       DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f wcp->data_size=%u\n",
-               fsp->fsp_name, fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size));
+       DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f "
+                "wcp->data_size=%u\n", fsp_str_dbg(fsp), fsp->fh->fd,
+                (double)pos, (unsigned int)n, (double)wcp->offset,
+                (unsigned int)wcp->data_size));
 
        fsp->fh->pos = pos + n;
 
-       /* 
+       if ((n == 1) && (data[0] == '\0') && (pos > wcp->file_size)) {
+               int ret;
+
+               /*
+                * This is a 1-byte write of a 0 beyond the EOF and
+                * thus implicitly also beyond the current active
+                * write cache, the typical file-extending (and
+                * allocating, but we're using the write cache here)
+                * write done by Windows. We just have to ftruncate
+                * the file and rely on posix semantics to return
+                * zeros for non-written file data that is within the
+                * file length.
+                *
+                * We can not use wcp_file_size_change here because we
+                * might have an existing write cache, and
+                * wcp_file_size_change assumes a change to just the
+                * end of the current write cache.
+                */
+
+               wcp->file_size = pos + 1;
+               ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
+               if (ret == -1) {
+                       DEBUG(0, ("wcp_file_size_change (%s): ftruncate of "
+                                 "size %.0f error %s\n", fsp_str_dbg(fsp),
+                                 (double)wcp->file_size, strerror(errno)));
+                       return -1;
+               }
+               return 1;
+       }
+
+
+       /*
         * If we have active cache and it isn't contiguous then we flush.
         * NOTE: There is a small problem with running out of disk ....
         */
 
        if (wcp->data_size) {
-               BOOL cache_flush_needed = False;
+               bool cache_flush_needed = False;
+
+               if ((pos >= wcp->offset) &&
+                   (pos <= wcp->offset + wcp->data_size)) {
 
-               if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
-      
                        /* ASCII art.... JRA.
 
       +--------------+-----
@@ -318,9 +447,13 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
                         * Start of write overlaps or abutts the existing data.
                         */
 
-                       size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
+                       size_t data_used;
 
-                       memcpy(wcp->data + (pos - wcp->offset), data, data_used);
+                       data_used = MIN((wcp->alloc_size - (pos - wcp->offset)),
+                                       n);
+
+                       memcpy(wcp->data + (pos - wcp->offset), data,
+                              data_used);
 
                        /*
                         * Update the current buffer size with the new data.
@@ -364,8 +497,9 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
 
                        write_path = 1;
 
-               } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
-                                       (pos + n <= wcp->offset + wcp->alloc_size)) {
+               } else if ((pos < wcp->offset) &&
+                          (pos + n > wcp->offset) &&
+                          (pos + n <= wcp->offset + wcp->alloc_size)) {
 
                        /* ASCII art.... JRA.
 
@@ -423,10 +557,10 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
 
                        write_path = 2;
 
-               } else if ( (pos >= wcp->file_size) && 
-                                       (wcp->offset + wcp->data_size == wcp->file_size) &&
-                                       (pos > wcp->offset + wcp->data_size) && 
-                                       (pos < wcp->offset + wcp->alloc_size) ) {
+               } else if ((pos >= wcp->file_size) &&
+                          (wcp->offset + wcp->data_size == wcp->file_size) &&
+                          (pos > wcp->offset + wcp->data_size) &&
+                          (pos < wcp->offset + wcp->alloc_size) ) {
 
                        /* ASCII art.... JRA.
 
@@ -454,7 +588,7 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
                        if(pos + n <= wcp->offset + wcp->alloc_size) {
                                data_used = n;
                        } else {
-                               data_used = wcp->offset + wcp->alloc_size - pos;
+                               data_used = wcp->offset+wcp->alloc_size-pos;
                        }
 
                        /*
@@ -464,7 +598,8 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
                        memset(wcp->data + wcp->data_size, '\0',
                                pos - (wcp->offset + wcp->data_size) );
 
-                       memcpy(wcp->data + (pos - wcp->offset), data, data_used);
+                       memcpy(wcp->data + (pos - wcp->offset), data,
+                              data_used);
 
                        /*
                         * Update the current buffer size with the new data.
@@ -509,29 +644,40 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
 
                        write_path = 3;
 
-                } else if ( (pos >= wcp->file_size) && 
+                } else if ( (pos >= wcp->file_size) &&
                            (n == 1) &&
-                           (pos < wcp->offset + 2*wcp->alloc_size) &&
-                           (wcp->file_size == wcp->offset + wcp->data_size)) {
+                           (wcp->file_size == wcp->offset + wcp->data_size) &&
+                           (pos < wcp->file_size + wcp->alloc_size)) {
 
                         /*
-                        +---------------+
-                        | Cached data   |
-                        +---------------+
+
+                End of file ---->|
+
+                 +---------------+---------------+
+                 | Cached data   | Cache buffer  |
+                 +---------------+---------------+
+
+                                 |<------- allocated size ---------------->|
 
                                                          +--------+
                                                          | 1 Byte |
                                                          +--------+
 
-                       MS-Office seems to do this a lot to determine if there's enough
-                       space on the filesystem to write a new file.
-                        */
+                       MS-Office seems to do this a lot to determine if
+                       there's enough space on the filesystem to write a new
+                       file.
+
+                       Change to :
 
-                       SMB_BIG_UINT new_start = wcp->offset + wcp->data_size;
+                End of file ---->|
+                                 +-----------------------+--------+
+                                 | Zeroed Cached data    | 1 Byte |
+                                 +-----------------------+--------+
+                        */
 
-                       flush_write_cache(fsp, WRITE_FLUSH);
-                       wcp->offset = new_start;
-                       wcp->data_size = pos - new_start + 1;
+                       flush_write_cache(fsp, SAMBA_WRITE_FLUSH);
+                       wcp->offset = wcp->file_size;
+                       wcp->data_size = pos - wcp->file_size + 1;
                        memset(wcp->data, '\0', wcp->data_size);
                        memcpy(wcp->data + wcp->data_size-1, data, 1);
 
@@ -557,9 +703,9 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
                         | Cached data   | Cache buffer  |
                         +---------------+---------------+
 
-                                                              +-------------------+
-                                                              | Data to write     |
-                                                              +-------------------+
+                                                              +---------------+
+                                                              | Data to write |
+                                                              +---------------+
 
    Case 2).
 
@@ -584,35 +730,46 @@ nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
                  */
 
                        /*
-                        * Write is bigger than buffer, or there is no overlap on the
-                        * low or high ends.
+                        * Write is bigger than buffer, or there is no
+                        * overlap on the low or high ends.
                         */
 
-                       DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
-len = %u\n",fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
+                       DEBUG(9,("write_file: non cacheable write : fd = %d, "
+                                "pos = %.0f, len = %u, "
+                                "current cache pos = %.0f len = %u\n",
+                                fsp->fh->fd, (double)pos, (unsigned int)n,
+                                (double)wcp->offset,
+                                (unsigned int)wcp->data_size ));
 
                        /*
-                        * If write would fit in the cache, and is larger than
-                        * the data already in the cache, flush the cache and
-                        * preferentially copy the data new data into it. Otherwise
-                        * just write the data directly.
+                        * If write would fit in the cache, and is
+                        * larger than the data already in the cache,
+                        * flush the cache and preferentially copy the
+                        * data new data into it. Otherwise just write
+                        * the data directly.
                         */
 
                        if ( n <= wcp->alloc_size && n > wcp->data_size) {
                                cache_flush_needed = True;
                        } else {
-                               ssize_t ret = real_write_file(fsp, data, pos, n);
+                               ssize_t ret = real_write_file(NULL, fsp, data,
+                                                             pos, n);
 
                                /*
-                                * If the write overlaps the entire cache, then
-                                * discard the current contents of the cache.
-                                * Fix from Rasmus Borup Hansen rbh@math.ku.dk.
+                                * If the write overlaps the entire
+                                * cache, then discard the current
+                                * contents of the cache.  Fix from
+                                * Rasmus Borup Hansen rbh@math.ku.dk.
                                 */
 
                                if ((pos <= wcp->offset) &&
-                                               (pos + n >= wcp->offset + wcp->data_size) ) {
-                                       DEBUG(9,("write_file: discarding overwritten write \
-cache: fd = %d, off=%.0f, size=%u\n", fsp->fh->fd, (double)wcp->offset, (unsigned int)wcp->data_size ));
+                                   (pos + n >= wcp->offset+wcp->data_size)) {
+                                       DEBUG(9,("write_file: discarding "
+                                                "overwritten write cache: "
+                                                "fd = %d, off=%.0f, "
+                                                "size=%u\n", fsp->fh->fd,
+                                                (double)wcp->offset,
+                                                (unsigned)wcp->data_size));
                                        wcp->data_size = 0;
                                }
 
@@ -633,12 +790,16 @@ cache: fd = %d, off=%.0f, size=%u\n", fsp->fh->fd, (double)wcp->offset, (unsigne
                }
 
                if (cache_flush_needed) {
-                       DEBUG(3,("WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
-n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
-                               write_path, fsp->fh->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
-                               (double)wcp->offset, (unsigned int)wcp->data_size ));
-
-                       flush_write_cache(fsp, WRITE_FLUSH);
+                       DEBUG(3, ("SAMBA_WRITE_FLUSH:%d: due to noncontinuous "
+                                 "write: fd = %d, size = %.0f, pos = %.0f, "
+                                 "n = %u, wcp->offset=%.0f, "
+                                 "wcp->data_size=%u\n",
+                                 write_path, fsp->fh->fd,
+                                 (double)wcp->file_size, (double)pos,
+                                 (unsigned int)n, (double)wcp->offset,
+                                 (unsigned int)wcp->data_size ));
+
+                       flush_write_cache(fsp, SAMBA_WRITE_FLUSH);
                }
        }
 
@@ -648,7 +809,7 @@ n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
         */
 
        if (n > wcp->alloc_size ) {
-               ssize_t ret = real_write_file(fsp, data, pos, n);
+               ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
                if (ret == -1) {
                        return -1;
                }
@@ -666,17 +827,35 @@ n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
         */
 
        if (n) {
-#ifdef WITH_PROFILE
+               DO_PROFILE_INC(writecache_cached_writes);
                if (wcp->data_size) {
                        DO_PROFILE_INC(writecache_abutted_writes);
                } else {
                        DO_PROFILE_INC(writecache_init_writes);
                }
-#endif
+
+               if ((wcp->data_size == 0)
+                   && (pos > wcp->file_size)
+                   && (pos + n <= wcp->file_size + wcp->alloc_size)) {
+                       /*
+                        * This is a write completely beyond the
+                        * current EOF, but within reach of the write
+                        * cache. We expect fill-up writes pretty
+                        * soon, so it does not make sense to start
+                        * the write cache at the current
+                        * offset. These fill-up writes would trigger
+                        * separate pwrites or even unnecessary cache
+                        * flushes because they overlap if this is a
+                        * one-byte allocating write.
+                        */
+                       wcp->offset = wcp->file_size;
+                       wcp->data_size = pos - wcp->file_size;
+                       memset(wcp->data, 0, wcp->data_size);
+               }
+
                memcpy(wcp->data+wcp->data_size, data, n);
                if (wcp->data_size == 0) {
                        wcp->offset = pos;
-                       DO_PROFILE_INC(writecache_num_write_caches);
                }
                wcp->data_size += n;
 
@@ -689,13 +868,15 @@ n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
                                return -1;
                        }
                }
-               DEBUG(9,("wcp->offset = %.0f wcp->data_size = %u cache return %u\n",
-                       (double)wcp->offset, (unsigned int)wcp->data_size, (unsigned int)n));
+               DEBUG(9, ("wcp->offset = %.0f wcp->data_size = %u cache "
+                         "return %u\n",
+                         (double)wcp->offset, (unsigned int)wcp->data_size,
+                         (unsigned int)n));
 
                total_written += n;
                return total_written; /* .... that's a write :) */
        }
-  
+
        return total_written;
 }
 
@@ -705,7 +886,7 @@ n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
 
 void delete_write_cache(files_struct *fsp)
 {
-       write_cache *wcp;
+       struct write_cache *wcp;
 
        if(!fsp) {
                return;
@@ -715,7 +896,7 @@ void delete_write_cache(files_struct *fsp)
                return;
        }
 
-       DO_PROFILE_DEC(writecache_allocated_write_caches);
+       DO_PROFILE_INC(writecache_deallocations);
        allocated_write_caches--;
 
        SMB_ASSERT(wcp->data_size == 0);
@@ -723,17 +904,18 @@ void delete_write_cache(files_struct *fsp)
        SAFE_FREE(wcp->data);
        SAFE_FREE(fsp->wcp);
 
-       DEBUG(10,("delete_write_cache: File %s deleted write cache\n", fsp->fsp_name ));
+       DEBUG(10,("delete_write_cache: File %s deleted write cache\n",
+                 fsp_str_dbg(fsp)));
 }
 
 /****************************************************************************
  Setup the write cache structure.
 ****************************************************************************/
 
-static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
+static bool setup_write_cache(files_struct *fsp, off_t file_size)
 {
        ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
-       write_cache *wcp;
+       struct write_cache *wcp;
 
        if (allocated_write_caches >= MAX_WRITE_CACHES) {
                return False;
@@ -743,7 +925,7 @@ static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
                return False;
        }
 
-       if((wcp = SMB_MALLOC_P(write_cache)) == NULL) {
+       if((wcp = SMB_MALLOC_P(struct write_cache)) == NULL) {
                DEBUG(0,("setup_write_cache: malloc fail.\n"));
                return False;
        }
@@ -762,11 +944,11 @@ static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
        memset(wcp->data, '\0', wcp->alloc_size );
 
        fsp->wcp = wcp;
-       DO_PROFILE_INC(writecache_allocated_write_caches);
+       DO_PROFILE_INC(writecache_allocations);
        allocated_write_caches++;
 
        DEBUG(10,("setup_write_cache: File %s allocated write cache size %lu\n",
-               fsp->fsp_name, (unsigned long)wcp->alloc_size ));
+                 fsp_str_dbg(fsp), (unsigned long)wcp->alloc_size));
 
        return True;
 }
@@ -775,15 +957,20 @@ static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
  Cope with a size change.
 ****************************************************************************/
 
-void set_filelen_write_cache(files_struct *fsp, SMB_OFF_T file_size)
+void set_filelen_write_cache(files_struct *fsp, off_t file_size)
 {
        if(fsp->wcp) {
                /* The cache *must* have been flushed before we do this. */
                if (fsp->wcp->data_size != 0) {
-                       pstring msg;
-                       slprintf(msg, sizeof(msg)-1, "set_filelen_write_cache: size change \
-on file %s with write cache size = %lu\n", fsp->fsp_name, (unsigned long)fsp->wcp->data_size );
-                       smb_panic(msg);
+                       char *msg;
+                       if (asprintf(&msg, "set_filelen_write_cache: size change "
+                                "on file %s with write cache size = %lu\n",
+                                fsp->fsp_name->base_name,
+                                (unsigned long)fsp->wcp->data_size) != -1) {
+                               smb_panic(msg);
+                       } else {
+                               smb_panic("set_filelen_write_cache");
+                       }
                }
                fsp->wcp->file_size = file_size;
        }
@@ -795,7 +982,7 @@ on file %s with write cache size = %lu\n", fsp->fsp_name, (unsigned long)fsp->wc
 
 ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
 {
-       write_cache *wcp = fsp->wcp;
+       struct write_cache *wcp = fsp->wcp;
        size_t data_size;
        ssize_t ret;
 
@@ -806,18 +993,43 @@ ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
        data_size = wcp->data_size;
        wcp->data_size = 0;
 
-       DO_PROFILE_DEC_INC(writecache_num_write_caches,writecache_flushed_writes[reason]);
+       switch (reason) {
+       case SAMBA_SEEK_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_seek);
+               break;
+       case SAMBA_READ_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_read);
+               break;
+       case SAMBA_WRITE_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_write);;
+               break;
+       case SAMBA_READRAW_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_readraw);
+               break;
+       case SAMBA_OPLOCK_RELEASE_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_oplock);
+               break;
+       case SAMBA_CLOSE_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_close);
+               break;
+       case SAMBA_SYNC_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_sync);
+               break;
+       case SAMBA_SIZECHANGE_FLUSH:
+               DO_PROFILE_INC(writecache_flush_reason_sizechange);
+               break;
+       default:
+               break;
+       }
 
        DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
                fsp->fh->fd, (double)wcp->offset, (unsigned int)data_size));
 
-#ifdef WITH_PROFILE
        if(data_size == wcp->alloc_size) {
-               DO_PROFILE_INC(writecache_num_perfect_writes);
+               DO_PROFILE_INC(writecache_perfect_writes);
        }
-#endif
 
-       ret = real_write_file(fsp, wcp->data, wcp->offset, data_size);
+       ret = real_write_file(NULL, fsp, wcp->data, wcp->offset, data_size);
 
        /*
         * Ensure file size if kept up to date if write extends file.
@@ -834,27 +1046,38 @@ ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
 sync a file
 ********************************************************************/
 
-void sync_file(connection_struct *conn, files_struct *fsp, BOOL write_through)
+NTSTATUS sync_file(connection_struct *conn, files_struct *fsp, bool write_through)
 {
                if (fsp->fh->fd == -1)
-               return;
+               return NT_STATUS_INVALID_HANDLE;
 
        if (lp_strict_sync(SNUM(conn)) &&
-           (lp_syncalways(SNUM(conn)) || write_through)) {
-               flush_write_cache(fsp, SYNC_FLUSH);
-               SMB_VFS_FSYNC(fsp,fsp->fh->fd);
+           (lp_sync_always(SNUM(conn)) || write_through)) {
+               int ret = flush_write_cache(fsp, SAMBA_SYNC_FLUSH);
+               if (ret == -1) {
+                       return map_nt_error_from_unix(errno);
+               }
+               ret = smb_vfs_fsync_sync(fsp);
+               if (ret == -1) {
+                       return map_nt_error_from_unix(errno);
+               }
        }
+       return NT_STATUS_OK;
 }
 
 /************************************************************
  Perform a stat whether a valid fd or not.
 ************************************************************/
 
-int fsp_stat(files_struct *fsp, SMB_STRUCT_STAT *pst)
+int fsp_stat(files_struct *fsp)
 {
        if (fsp->fh->fd == -1) {
-               return SMB_VFS_STAT(fsp->conn, fsp->fsp_name, pst);
+               if (fsp->posix_flags & FSP_POSIX_FLAGS_OPEN) {
+                       return SMB_VFS_LSTAT(fsp->conn, fsp->fsp_name);
+               } else {
+                       return SMB_VFS_STAT(fsp->conn, fsp->fsp_name);
+               }
        } else {
-               return SMB_VFS_FSTAT(fsp,fsp->fh->fd, pst);
+               return SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st);
        }
 }