s3:smbd: document the interaction between "smb2 leases" and "write cache size"
[samba.git] / source3 / smbd / fileio.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 1.9.
4    read/write to a files_struct
5    Copyright (C) Andrew Tridgell 1992-1998
6    Copyright (C) Jeremy Allison 2000-2002. - write cache.
7    
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include "includes.h"
23 #include "printing.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "smbprofile.h"
27
28 struct write_cache {
29         off_t file_size;
30         off_t offset;
31         size_t alloc_size;
32         size_t data_size;
33         char *data;
34 };
35
36 static bool setup_write_cache(files_struct *, off_t);
37
38 /****************************************************************************
39  Read from write cache if we can.
40 ****************************************************************************/
41
42 static bool read_from_write_cache(files_struct *fsp,char *data,off_t pos,size_t n)
43 {
44         struct write_cache *wcp = fsp->wcp;
45
46         if(!wcp) {
47                 return False;
48         }
49
50         if( n > wcp->data_size || pos < wcp->offset || pos + n > wcp->offset + wcp->data_size) {
51                 return False;
52         }
53
54         memcpy(data, wcp->data + (pos - wcp->offset), n);
55
56         DO_PROFILE_INC(writecache_cached_reads);
57
58         return True;
59 }
60
61 /****************************************************************************
62  Read from a file.
63 ****************************************************************************/
64
65 ssize_t read_file(files_struct *fsp,char *data,off_t pos,size_t n)
66 {
67         ssize_t ret = 0;
68
69         /* you can't read from print files */
70         if (fsp->print_file) {
71                 errno = EBADF;
72                 return -1;
73         }
74
75         /*
76          * Serve from write cache if we can.
77          */
78
79         if(read_from_write_cache(fsp, data, pos, n)) {
80                 fsp->fh->pos = pos + n;
81                 fsp->fh->position_information = fsp->fh->pos;
82                 return n;
83         }
84
85         flush_write_cache(fsp, SAMBA_READ_FLUSH);
86
87         fsp->fh->pos = pos;
88
89         if (n > 0) {
90                 ret = SMB_VFS_PREAD(fsp,data,n,pos);
91
92                 if (ret == -1) {
93                         return -1;
94                 }
95         }
96
97         DEBUG(10,("read_file (%s): pos = %.0f, size = %lu, returned %lu\n",
98                   fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
99
100         fsp->fh->pos += ret;
101         fsp->fh->position_information = fsp->fh->pos;
102
103         return(ret);
104 }
105
106 /****************************************************************************
107  *Really* write to a file.
108 ****************************************************************************/
109
110 static ssize_t real_write_file(struct smb_request *req,
111                                 files_struct *fsp,
112                                 const char *data,
113                                 off_t pos,
114                                 size_t n)
115 {
116         ssize_t ret;
117
118         if (pos == -1) {
119                 ret = vfs_write_data(req, fsp, data, n);
120         } else {
121                 fsp->fh->pos = pos;
122                 if (pos && lp_strict_allocate(SNUM(fsp->conn) &&
123                                 !fsp->is_sparse)) {
124                         if (vfs_fill_sparse(fsp, pos) == -1) {
125                                 return -1;
126                         }
127                 }
128                 ret = vfs_pwrite_data(req, fsp, data, n, pos);
129         }
130
131         DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
132                   fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
133
134         if (ret != -1) {
135                 fsp->fh->pos += ret;
136
137 /* Yes - this is correct - writes don't update this. JRA. */
138 /* Found by Samba4 tests. */
139 #if 0
140                 fsp->position_information = fsp->pos;
141 #endif
142         }
143
144         return ret;
145 }
146
147 /****************************************************************************
148  File size cache change.
149  Updates size on disk but doesn't flush the cache.
150 ****************************************************************************/
151
152 static int wcp_file_size_change(files_struct *fsp)
153 {
154         int ret;
155         struct write_cache *wcp = fsp->wcp;
156
157         wcp->file_size = wcp->offset + wcp->data_size;
158         ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
159         if (ret == -1) {
160                 DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f "
161                          "error %s\n", fsp_str_dbg(fsp),
162                          (double)wcp->file_size, strerror(errno)));
163         }
164         return ret;
165 }
166
167 void update_write_time_handler(struct tevent_context *ctx,
168                                       struct tevent_timer *te,
169                                       struct timeval now,
170                                       void *private_data)
171 {
172         files_struct *fsp = (files_struct *)private_data;
173
174         DEBUG(5, ("Update write time on %s\n", fsp_str_dbg(fsp)));
175
176         /* change the write time in the open file db. */
177         (void)set_write_time(fsp->file_id, timespec_current());
178
179         /* And notify. */
180         notify_fname(fsp->conn, NOTIFY_ACTION_MODIFIED,
181                      FILE_NOTIFY_CHANGE_LAST_WRITE, fsp->fsp_name->base_name);
182
183         /* Remove the timed event handler. */
184         TALLOC_FREE(fsp->update_write_time_event);
185 }
186
187 /*********************************************************
188  Schedule a write time update for WRITE_TIME_UPDATE_USEC_DELAY
189  in the future.
190 *********************************************************/
191
192 void trigger_write_time_update(struct files_struct *fsp)
193 {
194         int delay;
195
196         if (fsp->posix_open) {
197                 /* Don't use delayed writes on POSIX files. */
198                 return;
199         }
200
201         if (fsp->write_time_forced) {
202                 /* No point - "sticky" write times
203                  * in effect.
204                  */
205                 return;
206         }
207
208         /* We need to remember someone did a write
209          * and update to current time on close. */
210
211         fsp->update_write_time_on_close = true;
212
213         if (fsp->update_write_time_triggered) {
214                 /*
215                  * We only update the write time after 2 seconds
216                  * on the first normal write. After that
217                  * no other writes affect this until close.
218                  */
219                 return;
220         }
221         fsp->update_write_time_triggered = true;
222
223         delay = lp_parm_int(SNUM(fsp->conn),
224                             "smbd", "writetimeupdatedelay",
225                             WRITE_TIME_UPDATE_USEC_DELAY);
226
227         DEBUG(5, ("Update write time %d usec later on %s\n",
228                   delay, fsp_str_dbg(fsp)));
229
230         /* trigger the update 2 seconds later */
231         fsp->update_write_time_event =
232                 tevent_add_timer(fsp->conn->sconn->ev_ctx, NULL,
233                                  timeval_current_ofs_usec(delay),
234                                  update_write_time_handler, fsp);
235 }
236
237 void trigger_write_time_update_immediate(struct files_struct *fsp)
238 {
239         struct smb_file_time ft;
240
241         if (fsp->posix_open) {
242                 /* Don't use delayed writes on POSIX files. */
243                 return;
244         }
245
246         if (fsp->write_time_forced) {
247                 /*
248                  * No point - "sticky" write times
249                  * in effect.
250                  */
251                 return;
252         }
253
254         TALLOC_FREE(fsp->update_write_time_event);
255         DEBUG(5, ("Update write time immediate on %s\n",
256                   fsp_str_dbg(fsp)));
257
258         /* After an immediate update, reset the trigger. */
259         fsp->update_write_time_triggered = true;
260         fsp->update_write_time_on_close = false;
261
262         ZERO_STRUCT(ft);
263         ft.mtime = timespec_current();
264
265         /* Update the time in the open file db. */
266         (void)set_write_time(fsp->file_id, ft.mtime);
267
268         /* Now set on disk - takes care of notify. */
269         (void)smb_set_file_time(fsp->conn, fsp, fsp->fsp_name, &ft, false);
270 }
271
272 void mark_file_modified(files_struct *fsp)
273 {
274         int dosmode;
275
276         if (fsp->modified) {
277                 return;
278         }
279
280         fsp->modified = true;
281
282         if (SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st) != 0) {
283                 return;
284         }
285         trigger_write_time_update(fsp);
286
287         if (fsp->posix_open) {
288                 return;
289         }
290         if (!(lp_store_dos_attributes(SNUM(fsp->conn)) ||
291               MAP_ARCHIVE(fsp->conn))) {
292                 return;
293         }
294
295         dosmode = dos_mode(fsp->conn, fsp->fsp_name);
296         if (IS_DOS_ARCHIVE(dosmode)) {
297                 return;
298         }
299         file_set_dosmode(fsp->conn, fsp->fsp_name,
300                          dosmode | FILE_ATTRIBUTE_ARCHIVE, NULL, false);
301 }
302
303 /****************************************************************************
304  Write to a file.
305 ****************************************************************************/
306
307 ssize_t write_file(struct smb_request *req,
308                         files_struct *fsp,
309                         const char *data,
310                         off_t pos,
311                         size_t n)
312 {
313         struct write_cache *wcp = fsp->wcp;
314         ssize_t total_written = 0;
315         int write_path = -1;
316
317         if (fsp->print_file) {
318                 uint32_t t;
319                 int ret;
320
321                 ret = print_spool_write(fsp, data, n, pos, &t);
322                 if (ret) {
323                         errno = ret;
324                         return -1;
325                 }
326                 return t;
327         }
328
329         if (!fsp->can_write) {
330                 errno = EPERM;
331                 return -1;
332         }
333
334         /*
335          * If this is the first write and we have an exclusive oplock
336          * then setup the write cache.
337          */
338
339         if (!fsp->modified &&
340             EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) &&
341             (wcp == NULL)) {
342                 /*
343                  * Note: no write cache with leases!
344                  * as the handles would have to share the write cache
345                  * that's possible but an improvement for another day...
346                  */
347                 setup_write_cache(fsp, fsp->fsp_name->st.st_ex_size);
348                 wcp = fsp->wcp;
349         }
350
351         mark_file_modified(fsp);
352
353         DO_PROFILE_INC(writecache_total_writes);
354         if (!fsp->oplock_type) {
355                 DO_PROFILE_INC(writecache_non_oplock_writes);
356         }
357
358         /*
359          * If this file is level II oplocked then we need
360          * to grab the shared memory lock and inform all
361          * other files with a level II lock that they need
362          * to flush their read caches. We keep the lock over
363          * the shared memory area whilst doing this.
364          */
365
366         /* This should actually be improved to span the write. */
367         contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
368         contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
369
370         if (wcp && req->unread_bytes) {
371                 /* If we're using receivefile don't
372                  * deal with a write cache.
373                  */
374                 flush_write_cache(fsp, SAMBA_WRITE_FLUSH);
375                 delete_write_cache(fsp);
376                 wcp = NULL;
377         }
378
379         if(!wcp) {
380                 DO_PROFILE_INC(writecache_direct_writes);
381                 total_written = real_write_file(req, fsp, data, pos, n);
382                 return total_written;
383         }
384
385         DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f "
386                  "wcp->data_size=%u\n", fsp_str_dbg(fsp), fsp->fh->fd,
387                  (double)pos, (unsigned int)n, (double)wcp->offset,
388                  (unsigned int)wcp->data_size));
389
390         fsp->fh->pos = pos + n;
391
392         if ((n == 1) && (data[0] == '\0') && (pos > wcp->file_size)) {
393                 int ret;
394
395                 /*
396                  * This is a 1-byte write of a 0 beyond the EOF and
397                  * thus implicitly also beyond the current active
398                  * write cache, the typical file-extending (and
399                  * allocating, but we're using the write cache here)
400                  * write done by Windows. We just have to ftruncate
401                  * the file and rely on posix semantics to return
402                  * zeros for non-written file data that is within the
403                  * file length.
404                  *
405                  * We can not use wcp_file_size_change here because we
406                  * might have an existing write cache, and
407                  * wcp_file_size_change assumes a change to just the
408                  * end of the current write cache.
409                  */
410
411                 wcp->file_size = pos + 1;
412                 ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
413                 if (ret == -1) {
414                         DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f"
415                                  "error %s\n", fsp_str_dbg(fsp),
416                                  (double)wcp->file_size, strerror(errno)));
417                         return -1;
418                 }
419                 return 1;
420         }
421
422
423         /*
424          * If we have active cache and it isn't contiguous then we flush.
425          * NOTE: There is a small problem with running out of disk ....
426          */
427
428         if (wcp->data_size) {
429                 bool cache_flush_needed = False;
430
431                 if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
432       
433                         /* ASCII art.... JRA.
434
435       +--------------+-----
436       | Cached data  | Rest of allocated cache buffer....
437       +--------------+-----
438
439             +-------------------+
440             | Data to write     |
441             +-------------------+
442
443                         */
444
445                         /*
446                          * Start of write overlaps or abutts the existing data.
447                          */
448
449                         size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
450
451                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
452
453                         /*
454                          * Update the current buffer size with the new data.
455                          */
456
457                         if(pos + data_used > wcp->offset + wcp->data_size) {
458                                 wcp->data_size = pos + data_used - wcp->offset;
459                         }
460
461                         /*
462                          * Update the file size if changed.
463                          */
464
465                         if (wcp->offset + wcp->data_size > wcp->file_size) {
466                                 if (wcp_file_size_change(fsp) == -1) {
467                                         return -1;
468                                 }
469                         }
470
471                         /*
472                          * If we used all the data then
473                          * return here.
474                          */
475
476                         if(n == data_used) {
477                                 return n;
478                         } else {
479                                 cache_flush_needed = True;
480                         }
481                         /*
482                          * Move the start of data forward by the amount used,
483                          * cut down the amount left by the same amount.
484                          */
485
486                         data += data_used;
487                         pos += data_used;
488                         n -= data_used;
489
490                         DO_PROFILE_INC(writecache_abutted_writes);
491                         total_written = data_used;
492
493                         write_path = 1;
494
495                 } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
496                                         (pos + n <= wcp->offset + wcp->alloc_size)) {
497
498                         /* ASCII art.... JRA.
499
500                         +---------------+
501                         | Cache buffer  |
502                         +---------------+
503
504             +-------------------+
505             | Data to write     |
506             +-------------------+
507
508                         */
509
510                         /*
511                          * End of write overlaps the existing data.
512                          */
513
514                         size_t data_used = pos + n - wcp->offset;
515
516                         memcpy(wcp->data, data + n - data_used, data_used);
517
518                         /*
519                          * Update the current buffer size with the new data.
520                          */
521
522                         if(pos + n > wcp->offset + wcp->data_size) {
523                                 wcp->data_size = pos + n - wcp->offset;
524                         }
525
526                         /*
527                          * Update the file size if changed.
528                          */
529
530                         if (wcp->offset + wcp->data_size > wcp->file_size) {
531                                 if (wcp_file_size_change(fsp) == -1) {
532                                         return -1;
533                                 }
534                         }
535
536                         /*
537                          * We don't need to move the start of data, but we
538                          * cut down the amount left by the amount used.
539                          */
540
541                         n -= data_used;
542
543                         /*
544                          * We cannot have used all the data here.
545                          */
546
547                         cache_flush_needed = True;
548
549                         DO_PROFILE_INC(writecache_abutted_writes);
550                         total_written = data_used;
551
552                         write_path = 2;
553
554                 } else if ( (pos >= wcp->file_size) && 
555                                         (wcp->offset + wcp->data_size == wcp->file_size) &&
556                                         (pos > wcp->offset + wcp->data_size) && 
557                                         (pos < wcp->offset + wcp->alloc_size) ) {
558
559                         /* ASCII art.... JRA.
560
561                        End of file ---->|
562
563                         +---------------+---------------+
564                         | Cached data   | Cache buffer  |
565                         +---------------+---------------+
566
567                                               +-------------------+
568                                               | Data to write     |
569                                               +-------------------+
570
571                         */
572
573                         /*
574                          * Non-contiguous write part of which fits within
575                          * the cache buffer and is extending the file
576                          * and the cache contents reflect the current
577                          * data up to the current end of the file.
578                          */
579
580                         size_t data_used;
581
582                         if(pos + n <= wcp->offset + wcp->alloc_size) {
583                                 data_used = n;
584                         } else {
585                                 data_used = wcp->offset + wcp->alloc_size - pos;
586                         }
587
588                         /*
589                          * Fill in the non-continuous area with zeros.
590                          */
591
592                         memset(wcp->data + wcp->data_size, '\0',
593                                 pos - (wcp->offset + wcp->data_size) );
594
595                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
596
597                         /*
598                          * Update the current buffer size with the new data.
599                          */
600
601                         if(pos + data_used > wcp->offset + wcp->data_size) {
602                                 wcp->data_size = pos + data_used - wcp->offset;
603                         }
604
605                         /*
606                          * Update the file size if changed.
607                          */
608
609                         if (wcp->offset + wcp->data_size > wcp->file_size) {
610                                 if (wcp_file_size_change(fsp) == -1) {
611                                         return -1;
612                                 }
613                         }
614
615                         /*
616                          * If we used all the data then
617                          * return here.
618                          */
619
620                         if(n == data_used) {
621                                 return n;
622                         } else {
623                                 cache_flush_needed = True;
624                         }
625
626                         /*
627                          * Move the start of data forward by the amount used,
628                          * cut down the amount left by the same amount.
629                          */
630
631                         data += data_used;
632                         pos += data_used;
633                         n -= data_used;
634
635                         DO_PROFILE_INC(writecache_abutted_writes);
636                         total_written = data_used;
637
638                         write_path = 3;
639
640                 } else if ( (pos >= wcp->file_size) &&
641                             (n == 1) &&
642                             (wcp->file_size == wcp->offset + wcp->data_size) &&
643                             (pos < wcp->file_size + wcp->alloc_size)) {
644
645                         /*
646
647                 End of file ---->|
648
649                  +---------------+---------------+
650                  | Cached data   | Cache buffer  |
651                  +---------------+---------------+
652
653                                  |<------- allocated size ---------------->|
654
655                                                          +--------+
656                                                          | 1 Byte |
657                                                          +--------+
658
659                         MS-Office seems to do this a lot to determine if there's enough
660                         space on the filesystem to write a new file.
661
662                         Change to :
663
664                 End of file ---->|
665                                  +-----------------------+--------+
666                                  | Zeroed Cached data    | 1 Byte |
667                                  +-----------------------+--------+
668                         */
669
670                         flush_write_cache(fsp, SAMBA_WRITE_FLUSH);
671                         wcp->offset = wcp->file_size;
672                         wcp->data_size = pos - wcp->file_size + 1;
673                         memset(wcp->data, '\0', wcp->data_size);
674                         memcpy(wcp->data + wcp->data_size-1, data, 1);
675
676                         /*
677                          * Update the file size if changed.
678                          */
679
680                         if (wcp->offset + wcp->data_size > wcp->file_size) {
681                                 if (wcp_file_size_change(fsp) == -1) {
682                                         return -1;
683                                 }
684                         }
685
686                         return n;
687
688                 } else {
689
690                         /* ASCII art..... JRA.
691
692    Case 1).
693
694                         +---------------+---------------+
695                         | Cached data   | Cache buffer  |
696                         +---------------+---------------+
697
698                                                               +-------------------+
699                                                               | Data to write     |
700                                                               +-------------------+
701
702    Case 2).
703
704                            +---------------+---------------+
705                            | Cached data   | Cache buffer  |
706                            +---------------+---------------+
707
708    +-------------------+
709    | Data to write     |
710    +-------------------+
711
712     Case 3).
713
714                            +---------------+---------------+
715                            | Cached data   | Cache buffer  |
716                            +---------------+---------------+
717
718                   +-----------------------------------------------------+
719                   | Data to write                                       |
720                   +-----------------------------------------------------+
721
722                   */
723
724                         /*
725                          * Write is bigger than buffer, or there is no overlap on the
726                          * low or high ends.
727                          */
728
729                         DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
730 len = %u\n",fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
731
732                         /*
733                          * If write would fit in the cache, and is larger than
734                          * the data already in the cache, flush the cache and
735                          * preferentially copy the data new data into it. Otherwise
736                          * just write the data directly.
737                          */
738
739                         if ( n <= wcp->alloc_size && n > wcp->data_size) {
740                                 cache_flush_needed = True;
741                         } else {
742                                 ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
743
744                                 /*
745                                  * If the write overlaps the entire cache, then
746                                  * discard the current contents of the cache.
747                                  * Fix from Rasmus Borup Hansen rbh@math.ku.dk.
748                                  */
749
750                                 if ((pos <= wcp->offset) &&
751                                                 (pos + n >= wcp->offset + wcp->data_size) ) {
752                                         DEBUG(9,("write_file: discarding overwritten write \
753 cache: fd = %d, off=%.0f, size=%u\n", fsp->fh->fd, (double)wcp->offset, (unsigned int)wcp->data_size ));
754                                         wcp->data_size = 0;
755                                 }
756
757                                 DO_PROFILE_INC(writecache_direct_writes);
758                                 if (ret == -1) {
759                                         return ret;
760                                 }
761
762                                 if (pos + ret > wcp->file_size) {
763                                         wcp->file_size = pos + ret;
764                                 }
765
766                                 return ret;
767                         }
768
769                         write_path = 4;
770
771                 }
772
773                 if (cache_flush_needed) {
774                         DEBUG(3,("SAMBA_WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
775 n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
776                                 write_path, fsp->fh->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
777                                 (double)wcp->offset, (unsigned int)wcp->data_size ));
778
779                         flush_write_cache(fsp, SAMBA_WRITE_FLUSH);
780                 }
781         }
782
783         /*
784          * If the write request is bigger than the cache
785          * size, write it all out.
786          */
787
788         if (n > wcp->alloc_size ) {
789                 ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
790                 if (ret == -1) {
791                         return -1;
792                 }
793
794                 if (pos + ret > wcp->file_size) {
795                         wcp->file_size = pos + n;
796                 }
797
798                 DO_PROFILE_INC(writecache_direct_writes);
799                 return total_written + n;
800         }
801
802         /*
803          * If there's any data left, cache it.
804          */
805
806         if (n) {
807                 DO_PROFILE_INC(writecache_cached_writes);
808                 if (wcp->data_size) {
809                         DO_PROFILE_INC(writecache_abutted_writes);
810                 } else {
811                         DO_PROFILE_INC(writecache_init_writes);
812                 }
813
814                 if ((wcp->data_size == 0)
815                     && (pos > wcp->file_size)
816                     && (pos + n <= wcp->file_size + wcp->alloc_size)) {
817                         /*
818                          * This is a write completely beyond the
819                          * current EOF, but within reach of the write
820                          * cache. We expect fill-up writes pretty
821                          * soon, so it does not make sense to start
822                          * the write cache at the current
823                          * offset. These fill-up writes would trigger
824                          * separate pwrites or even unnecessary cache
825                          * flushes because they overlap if this is a
826                          * one-byte allocating write.
827                          */
828                         wcp->offset = wcp->file_size;
829                         wcp->data_size = pos - wcp->file_size;
830                         memset(wcp->data, 0, wcp->data_size);
831                 }
832
833                 memcpy(wcp->data+wcp->data_size, data, n);
834                 if (wcp->data_size == 0) {
835                         wcp->offset = pos;
836                 }
837                 wcp->data_size += n;
838
839                 /*
840                  * Update the file size if changed.
841                  */
842
843                 if (wcp->offset + wcp->data_size > wcp->file_size) {
844                         if (wcp_file_size_change(fsp) == -1) {
845                                 return -1;
846                         }
847                 }
848                 DEBUG(9,("wcp->offset = %.0f wcp->data_size = %u cache return %u\n",
849                         (double)wcp->offset, (unsigned int)wcp->data_size, (unsigned int)n));
850
851                 total_written += n;
852                 return total_written; /* .... that's a write :) */
853         }
854   
855         return total_written;
856 }
857
858 /****************************************************************************
859  Delete the write cache structure.
860 ****************************************************************************/
861
862 void delete_write_cache(files_struct *fsp)
863 {
864         struct write_cache *wcp;
865
866         if(!fsp) {
867                 return;
868         }
869
870         if(!(wcp = fsp->wcp)) {
871                 return;
872         }
873
874         DO_PROFILE_INC(writecache_deallocations);
875         allocated_write_caches--;
876
877         SMB_ASSERT(wcp->data_size == 0);
878
879         SAFE_FREE(wcp->data);
880         SAFE_FREE(fsp->wcp);
881
882         DEBUG(10,("delete_write_cache: File %s deleted write cache\n",
883                   fsp_str_dbg(fsp)));
884 }
885
886 /****************************************************************************
887  Setup the write cache structure.
888 ****************************************************************************/
889
890 static bool setup_write_cache(files_struct *fsp, off_t file_size)
891 {
892         ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
893         struct write_cache *wcp;
894
895         if (allocated_write_caches >= MAX_WRITE_CACHES) {
896                 return False;
897         }
898
899         if(alloc_size == 0 || fsp->wcp) {
900                 return False;
901         }
902
903         if((wcp = SMB_MALLOC_P(struct write_cache)) == NULL) {
904                 DEBUG(0,("setup_write_cache: malloc fail.\n"));
905                 return False;
906         }
907
908         wcp->file_size = file_size;
909         wcp->offset = 0;
910         wcp->alloc_size = alloc_size;
911         wcp->data_size = 0;
912         if((wcp->data = (char *)SMB_MALLOC(wcp->alloc_size)) == NULL) {
913                 DEBUG(0,("setup_write_cache: malloc fail for buffer size %u.\n",
914                         (unsigned int)wcp->alloc_size ));
915                 SAFE_FREE(wcp);
916                 return False;
917         }
918
919         memset(wcp->data, '\0', wcp->alloc_size );
920
921         fsp->wcp = wcp;
922         DO_PROFILE_INC(writecache_allocations);
923         allocated_write_caches++;
924
925         DEBUG(10,("setup_write_cache: File %s allocated write cache size %lu\n",
926                   fsp_str_dbg(fsp), (unsigned long)wcp->alloc_size));
927
928         return True;
929 }
930
931 /****************************************************************************
932  Cope with a size change.
933 ****************************************************************************/
934
935 void set_filelen_write_cache(files_struct *fsp, off_t file_size)
936 {
937         if(fsp->wcp) {
938                 /* The cache *must* have been flushed before we do this. */
939                 if (fsp->wcp->data_size != 0) {
940                         char *msg;
941                         if (asprintf(&msg, "set_filelen_write_cache: size change "
942                                  "on file %s with write cache size = %lu\n",
943                                  fsp->fsp_name->base_name,
944                                  (unsigned long)fsp->wcp->data_size) != -1) {
945                                 smb_panic(msg);
946                         } else {
947                                 smb_panic("set_filelen_write_cache");
948                         }
949                 }
950                 fsp->wcp->file_size = file_size;
951         }
952 }
953
954 /*******************************************************************
955  Flush a write cache struct to disk.
956 ********************************************************************/
957
958 ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
959 {
960         struct write_cache *wcp = fsp->wcp;
961         size_t data_size;
962         ssize_t ret;
963
964         if(!wcp || !wcp->data_size) {
965                 return 0;
966         }
967
968         data_size = wcp->data_size;
969         wcp->data_size = 0;
970
971         switch (reason) {
972         case SAMBA_SEEK_FLUSH:
973                 DO_PROFILE_INC(writecache_flush_reason_seek);
974                 break;
975         case SAMBA_READ_FLUSH:
976                 DO_PROFILE_INC(writecache_flush_reason_read);
977                 break;
978         case SAMBA_WRITE_FLUSH:
979                 DO_PROFILE_INC(writecache_flush_reason_write);;
980                 break;
981         case SAMBA_READRAW_FLUSH:
982                 DO_PROFILE_INC(writecache_flush_reason_readraw);
983                 break;
984         case SAMBA_OPLOCK_RELEASE_FLUSH:
985                 DO_PROFILE_INC(writecache_flush_reason_oplock);
986                 break;
987         case SAMBA_CLOSE_FLUSH:
988                 DO_PROFILE_INC(writecache_flush_reason_close);
989                 break;
990         case SAMBA_SYNC_FLUSH:
991                 DO_PROFILE_INC(writecache_flush_reason_sync);
992                 break;
993         case SAMBA_SIZECHANGE_FLUSH:
994                 DO_PROFILE_INC(writecache_flush_reason_sizechange);
995                 break;
996         default:
997                 break;
998         }
999
1000         DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
1001                 fsp->fh->fd, (double)wcp->offset, (unsigned int)data_size));
1002
1003         if(data_size == wcp->alloc_size) {
1004                 DO_PROFILE_INC(writecache_perfect_writes);
1005         }
1006
1007         ret = real_write_file(NULL, fsp, wcp->data, wcp->offset, data_size);
1008
1009         /*
1010          * Ensure file size if kept up to date if write extends file.
1011          */
1012
1013         if ((ret != -1) && (wcp->offset + ret > wcp->file_size)) {
1014                 wcp->file_size = wcp->offset + ret;
1015         }
1016
1017         return ret;
1018 }
1019
1020 /*******************************************************************
1021 sync a file
1022 ********************************************************************/
1023
1024 NTSTATUS sync_file(connection_struct *conn, files_struct *fsp, bool write_through)
1025 {
1026         if (fsp->fh->fd == -1)
1027                 return NT_STATUS_INVALID_HANDLE;
1028
1029         if (lp_strict_sync(SNUM(conn)) &&
1030             (lp_sync_always(SNUM(conn)) || write_through)) {
1031                 int ret = flush_write_cache(fsp, SAMBA_SYNC_FLUSH);
1032                 if (ret == -1) {
1033                         return map_nt_error_from_unix(errno);
1034                 }
1035                 ret = SMB_VFS_FSYNC(fsp);
1036                 if (ret == -1) {
1037                         return map_nt_error_from_unix(errno);
1038                 }
1039         }
1040         return NT_STATUS_OK;
1041 }
1042
1043 /************************************************************
1044  Perform a stat whether a valid fd or not.
1045 ************************************************************/
1046
1047 int fsp_stat(files_struct *fsp)
1048 {
1049         if (fsp->fh->fd == -1) {
1050                 if (fsp->posix_open) {
1051                         return SMB_VFS_LSTAT(fsp->conn, fsp->fsp_name);
1052                 } else {
1053                         return SMB_VFS_STAT(fsp->conn, fsp->fsp_name);
1054                 }
1055         } else {
1056                 return SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st);
1057         }
1058 }