s3: include smbd/smbd.h where needed.
[abartlet/samba.git/.git] / source3 / smbd / fileio.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 1.9.
4    read/write to a files_struct
5    Copyright (C) Andrew Tridgell 1992-1998
6    Copyright (C) Jeremy Allison 2000-2002. - write cache.
7    
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include "includes.h"
23 #include "printing.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26
27 static bool setup_write_cache(files_struct *, SMB_OFF_T);
28
29 /****************************************************************************
30  Read from write cache if we can.
31 ****************************************************************************/
32
33 static bool read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
34 {
35         write_cache *wcp = fsp->wcp;
36
37         if(!wcp) {
38                 return False;
39         }
40
41         if( n > wcp->data_size || pos < wcp->offset || pos + n > wcp->offset + wcp->data_size) {
42                 return False;
43         }
44
45         memcpy(data, wcp->data + (pos - wcp->offset), n);
46
47         DO_PROFILE_INC(writecache_read_hits);
48
49         return True;
50 }
51
52 /****************************************************************************
53  Read from a file.
54 ****************************************************************************/
55
56 ssize_t read_file(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
57 {
58         ssize_t ret=0,readret;
59
60         /* you can't read from print files */
61         if (fsp->print_file) {
62                 errno = EBADF;
63                 return -1;
64         }
65
66         /*
67          * Serve from write cache if we can.
68          */
69
70         if(read_from_write_cache(fsp, data, pos, n)) {
71                 fsp->fh->pos = pos + n;
72                 fsp->fh->position_information = fsp->fh->pos;
73                 return n;
74         }
75
76         flush_write_cache(fsp, READ_FLUSH);
77
78         fsp->fh->pos = pos;
79
80         if (n > 0) {
81 #ifdef DMF_FIX
82                 int numretries = 3;
83 tryagain:
84                 readret = SMB_VFS_PREAD(fsp,data,n,pos);
85
86                 if (readret == -1) {
87                         if ((errno == EAGAIN) && numretries) {
88                                 DEBUG(3,("read_file EAGAIN retry in 10 seconds\n"));
89                                 (void)sleep(10);
90                                 --numretries;
91                                 goto tryagain;
92                         }
93                         return -1;
94                 }
95 #else /* NO DMF fix. */
96                 readret = SMB_VFS_PREAD(fsp,data,n,pos);
97
98                 if (readret == -1) {
99                         return -1;
100                 }
101 #endif
102                 if (readret > 0) {
103                         ret += readret;
104                 }
105         }
106
107         DEBUG(10,("read_file (%s): pos = %.0f, size = %lu, returned %lu\n",
108                   fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
109
110         fsp->fh->pos += ret;
111         fsp->fh->position_information = fsp->fh->pos;
112
113         return(ret);
114 }
115
116 /****************************************************************************
117  *Really* write to a file.
118 ****************************************************************************/
119
120 static ssize_t real_write_file(struct smb_request *req,
121                                 files_struct *fsp,
122                                 const char *data,
123                                 SMB_OFF_T pos,
124                                 size_t n)
125 {
126         ssize_t ret;
127
128         if (pos == -1) {
129                 ret = vfs_write_data(req, fsp, data, n);
130         } else {
131                 fsp->fh->pos = pos;
132                 if (pos && lp_strict_allocate(SNUM(fsp->conn) &&
133                                 !fsp->is_sparse)) {
134                         if (vfs_fill_sparse(fsp, pos) == -1) {
135                                 return -1;
136                         }
137                 }
138                 ret = vfs_pwrite_data(req, fsp, data, n, pos);
139         }
140
141         DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
142                   fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
143
144         if (ret != -1) {
145                 fsp->fh->pos += ret;
146
147 /* Yes - this is correct - writes don't update this. JRA. */
148 /* Found by Samba4 tests. */
149 #if 0
150                 fsp->position_information = fsp->pos;
151 #endif
152         }
153
154         return ret;
155 }
156
157 /****************************************************************************
158  File size cache change.
159  Updates size on disk but doesn't flush the cache.
160 ****************************************************************************/
161
162 static int wcp_file_size_change(files_struct *fsp)
163 {
164         int ret;
165         write_cache *wcp = fsp->wcp;
166
167         wcp->file_size = wcp->offset + wcp->data_size;
168         ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
169         if (ret == -1) {
170                 DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f "
171                          "error %s\n", fsp_str_dbg(fsp),
172                          (double)wcp->file_size, strerror(errno)));
173         }
174         return ret;
175 }
176
177 void update_write_time_handler(struct event_context *ctx,
178                                       struct timed_event *te,
179                                       struct timeval now,
180                                       void *private_data)
181 {
182         files_struct *fsp = (files_struct *)private_data;
183
184         DEBUG(5, ("Update write time on %s\n", fsp_str_dbg(fsp)));
185
186         /* change the write time in the open file db. */
187         (void)set_write_time(fsp->file_id, timespec_current());
188
189         /* And notify. */
190         notify_fname(fsp->conn, NOTIFY_ACTION_MODIFIED,
191                      FILE_NOTIFY_CHANGE_LAST_WRITE, fsp->fsp_name->base_name);
192
193         /* Remove the timed event handler. */
194         TALLOC_FREE(fsp->update_write_time_event);
195 }
196
197 /*********************************************************
198  Schedule a write time update for WRITE_TIME_UPDATE_USEC_DELAY
199  in the future.
200 *********************************************************/
201
202 void trigger_write_time_update(struct files_struct *fsp)
203 {
204         int delay;
205
206         if (fsp->posix_open) {
207                 /* Don't use delayed writes on POSIX files. */
208                 return;
209         }
210
211         if (fsp->write_time_forced) {
212                 /* No point - "sticky" write times
213                  * in effect.
214                  */
215                 return;
216         }
217
218         /* We need to remember someone did a write
219          * and update to current time on close. */
220
221         fsp->update_write_time_on_close = true;
222
223         if (fsp->update_write_time_triggered) {
224                 /*
225                  * We only update the write time after 2 seconds
226                  * on the first normal write. After that
227                  * no other writes affect this until close.
228                  */
229                 return;
230         }
231         fsp->update_write_time_triggered = true;
232
233         delay = lp_parm_int(SNUM(fsp->conn),
234                             "smbd", "writetimeupdatedelay",
235                             WRITE_TIME_UPDATE_USEC_DELAY);
236
237         DEBUG(5, ("Update write time %d usec later on %s\n",
238                   delay, fsp_str_dbg(fsp)));
239
240         /* trigger the update 2 seconds later */
241         fsp->update_write_time_event =
242                 event_add_timed(smbd_event_context(), NULL,
243                                 timeval_current_ofs(0, delay),
244                                 update_write_time_handler, fsp);
245 }
246
247 void trigger_write_time_update_immediate(struct files_struct *fsp)
248 {
249         struct smb_file_time ft;
250
251         if (fsp->posix_open) {
252                 /* Don't use delayed writes on POSIX files. */
253                 return;
254         }
255
256         if (fsp->write_time_forced) {
257                 /*
258                  * No point - "sticky" write times
259                  * in effect.
260                  */
261                 return;
262         }
263
264         TALLOC_FREE(fsp->update_write_time_event);
265         DEBUG(5, ("Update write time immediate on %s\n",
266                   fsp_str_dbg(fsp)));
267
268         /* After an immediate update, reset the trigger. */
269         fsp->update_write_time_triggered = true;
270         fsp->update_write_time_on_close = false;
271
272         ZERO_STRUCT(ft);
273         ft.mtime = timespec_current();
274
275         /* Update the time in the open file db. */
276         (void)set_write_time(fsp->file_id, ft.mtime);
277
278         /* Now set on disk - takes care of notify. */
279         (void)smb_set_file_time(fsp->conn, fsp, fsp->fsp_name, &ft, false);
280 }
281
282 /****************************************************************************
283  Write to a file.
284 ****************************************************************************/
285
286 ssize_t write_file(struct smb_request *req,
287                         files_struct *fsp,
288                         const char *data,
289                         SMB_OFF_T pos,
290                         size_t n)
291 {
292         write_cache *wcp = fsp->wcp;
293         ssize_t total_written = 0;
294         int write_path = -1;
295
296         if (fsp->print_file) {
297                 uint32_t t;
298                 int ret;
299
300                 ret = print_spool_write(fsp, data, n, pos, &t);
301                 if (ret) {
302                         errno = ret;
303                         return -1;
304                 }
305                 return t;
306         }
307
308         if (!fsp->can_write) {
309                 errno = EPERM;
310                 return -1;
311         }
312
313         if (!fsp->modified) {
314                 fsp->modified = True;
315
316                 if (SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st) == 0) {
317                         trigger_write_time_update(fsp);
318                         if (!fsp->posix_open &&
319                                         (lp_store_dos_attributes(SNUM(fsp->conn)) ||
320                                         MAP_ARCHIVE(fsp->conn))) {
321                                 int dosmode = dos_mode(fsp->conn, fsp->fsp_name);
322                                 if (!IS_DOS_ARCHIVE(dosmode)) {
323                                         file_set_dosmode(fsp->conn, fsp->fsp_name,
324                                                  dosmode | aARCH, NULL, false);
325                                 }
326                         }
327
328                         /*
329                          * If this is the first write and we have an exclusive oplock then setup
330                          * the write cache.
331                          */
332
333                         if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && !wcp) {
334                                 setup_write_cache(fsp,
335                                                  fsp->fsp_name->st.st_ex_size);
336                                 wcp = fsp->wcp;
337                         }
338                 }
339         }
340
341 #ifdef WITH_PROFILE
342         DO_PROFILE_INC(writecache_total_writes);
343         if (!fsp->oplock_type) {
344                 DO_PROFILE_INC(writecache_non_oplock_writes);
345         }
346 #endif
347
348         /*
349          * If this file is level II oplocked then we need
350          * to grab the shared memory lock and inform all
351          * other files with a level II lock that they need
352          * to flush their read caches. We keep the lock over
353          * the shared memory area whilst doing this.
354          */
355
356         /* This should actually be improved to span the write. */
357         contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
358         contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
359
360 #ifdef WITH_PROFILE
361         if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
362                 DEBUG(3,("WRITECACHE: initwrites=%u abutted=%u total=%u \
363 nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
364                         profile_p->writecache_init_writes,
365                         profile_p->writecache_abutted_writes,
366                         profile_p->writecache_total_writes,
367                         profile_p->writecache_non_oplock_writes,
368                         profile_p->writecache_allocated_write_caches,
369                         profile_p->writecache_num_write_caches,
370                         profile_p->writecache_direct_writes,
371                         profile_p->writecache_num_perfect_writes,
372                         profile_p->writecache_read_hits ));
373
374                 DEBUG(3,("WRITECACHE: Flushes SEEK=%d, READ=%d, WRITE=%d, READRAW=%d, OPLOCK=%d, CLOSE=%d, SYNC=%d\n",
375                         profile_p->writecache_flushed_writes[SEEK_FLUSH],
376                         profile_p->writecache_flushed_writes[READ_FLUSH],
377                         profile_p->writecache_flushed_writes[WRITE_FLUSH],
378                         profile_p->writecache_flushed_writes[READRAW_FLUSH],
379                         profile_p->writecache_flushed_writes[OPLOCK_RELEASE_FLUSH],
380                         profile_p->writecache_flushed_writes[CLOSE_FLUSH],
381                         profile_p->writecache_flushed_writes[SYNC_FLUSH] ));
382         }
383 #endif
384
385         if (wcp && req->unread_bytes) {
386                 /* If we're using receivefile don't
387                  * deal with a write cache.
388                  */
389                 flush_write_cache(fsp, WRITE_FLUSH);
390                 delete_write_cache(fsp);
391                 wcp = NULL;
392         }
393
394         if(!wcp) {
395                 DO_PROFILE_INC(writecache_direct_writes);
396                 total_written = real_write_file(req, fsp, data, pos, n);
397                 return total_written;
398         }
399
400         DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f "
401                  "wcp->data_size=%u\n", fsp_str_dbg(fsp), fsp->fh->fd,
402                  (double)pos, (unsigned int)n, (double)wcp->offset,
403                  (unsigned int)wcp->data_size));
404
405         fsp->fh->pos = pos + n;
406
407         if ((n == 1) && (data[0] == '\0') && (pos > wcp->file_size)) {
408                 int ret;
409
410                 /*
411                  * This is a 1-byte write of a 0 beyond the EOF and
412                  * thus implicitly also beyond the current active
413                  * write cache, the typical file-extending (and
414                  * allocating, but we're using the write cache here)
415                  * write done by Windows. We just have to ftruncate
416                  * the file and rely on posix semantics to return
417                  * zeros for non-written file data that is within the
418                  * file length.
419                  *
420                  * We can not use wcp_file_size_change here because we
421                  * might have an existing write cache, and
422                  * wcp_file_size_change assumes a change to just the
423                  * end of the current write cache.
424                  */
425
426                 wcp->file_size = pos + 1;
427                 ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
428                 if (ret == -1) {
429                         DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f"
430                                  "error %s\n", fsp_str_dbg(fsp),
431                                  (double)wcp->file_size, strerror(errno)));
432                         return -1;
433                 }
434                 return 1;
435         }
436
437
438         /*
439          * If we have active cache and it isn't contiguous then we flush.
440          * NOTE: There is a small problem with running out of disk ....
441          */
442
443         if (wcp->data_size) {
444                 bool cache_flush_needed = False;
445
446                 if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
447       
448                         /* ASCII art.... JRA.
449
450       +--------------+-----
451       | Cached data  | Rest of allocated cache buffer....
452       +--------------+-----
453
454             +-------------------+
455             | Data to write     |
456             +-------------------+
457
458                         */
459
460                         /*
461                          * Start of write overlaps or abutts the existing data.
462                          */
463
464                         size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
465
466                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
467
468                         /*
469                          * Update the current buffer size with the new data.
470                          */
471
472                         if(pos + data_used > wcp->offset + wcp->data_size) {
473                                 wcp->data_size = pos + data_used - wcp->offset;
474                         }
475
476                         /*
477                          * Update the file size if changed.
478                          */
479
480                         if (wcp->offset + wcp->data_size > wcp->file_size) {
481                                 if (wcp_file_size_change(fsp) == -1) {
482                                         return -1;
483                                 }
484                         }
485
486                         /*
487                          * If we used all the data then
488                          * return here.
489                          */
490
491                         if(n == data_used) {
492                                 return n;
493                         } else {
494                                 cache_flush_needed = True;
495                         }
496                         /*
497                          * Move the start of data forward by the amount used,
498                          * cut down the amount left by the same amount.
499                          */
500
501                         data += data_used;
502                         pos += data_used;
503                         n -= data_used;
504
505                         DO_PROFILE_INC(writecache_abutted_writes);
506                         total_written = data_used;
507
508                         write_path = 1;
509
510                 } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
511                                         (pos + n <= wcp->offset + wcp->alloc_size)) {
512
513                         /* ASCII art.... JRA.
514
515                         +---------------+
516                         | Cache buffer  |
517                         +---------------+
518
519             +-------------------+
520             | Data to write     |
521             +-------------------+
522
523                         */
524
525                         /*
526                          * End of write overlaps the existing data.
527                          */
528
529                         size_t data_used = pos + n - wcp->offset;
530
531                         memcpy(wcp->data, data + n - data_used, data_used);
532
533                         /*
534                          * Update the current buffer size with the new data.
535                          */
536
537                         if(pos + n > wcp->offset + wcp->data_size) {
538                                 wcp->data_size = pos + n - wcp->offset;
539                         }
540
541                         /*
542                          * Update the file size if changed.
543                          */
544
545                         if (wcp->offset + wcp->data_size > wcp->file_size) {
546                                 if (wcp_file_size_change(fsp) == -1) {
547                                         return -1;
548                                 }
549                         }
550
551                         /*
552                          * We don't need to move the start of data, but we
553                          * cut down the amount left by the amount used.
554                          */
555
556                         n -= data_used;
557
558                         /*
559                          * We cannot have used all the data here.
560                          */
561
562                         cache_flush_needed = True;
563
564                         DO_PROFILE_INC(writecache_abutted_writes);
565                         total_written = data_used;
566
567                         write_path = 2;
568
569                 } else if ( (pos >= wcp->file_size) && 
570                                         (wcp->offset + wcp->data_size == wcp->file_size) &&
571                                         (pos > wcp->offset + wcp->data_size) && 
572                                         (pos < wcp->offset + wcp->alloc_size) ) {
573
574                         /* ASCII art.... JRA.
575
576                        End of file ---->|
577
578                         +---------------+---------------+
579                         | Cached data   | Cache buffer  |
580                         +---------------+---------------+
581
582                                               +-------------------+
583                                               | Data to write     |
584                                               +-------------------+
585
586                         */
587
588                         /*
589                          * Non-contiguous write part of which fits within
590                          * the cache buffer and is extending the file
591                          * and the cache contents reflect the current
592                          * data up to the current end of the file.
593                          */
594
595                         size_t data_used;
596
597                         if(pos + n <= wcp->offset + wcp->alloc_size) {
598                                 data_used = n;
599                         } else {
600                                 data_used = wcp->offset + wcp->alloc_size - pos;
601                         }
602
603                         /*
604                          * Fill in the non-continuous area with zeros.
605                          */
606
607                         memset(wcp->data + wcp->data_size, '\0',
608                                 pos - (wcp->offset + wcp->data_size) );
609
610                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
611
612                         /*
613                          * Update the current buffer size with the new data.
614                          */
615
616                         if(pos + data_used > wcp->offset + wcp->data_size) {
617                                 wcp->data_size = pos + data_used - wcp->offset;
618                         }
619
620                         /*
621                          * Update the file size if changed.
622                          */
623
624                         if (wcp->offset + wcp->data_size > wcp->file_size) {
625                                 if (wcp_file_size_change(fsp) == -1) {
626                                         return -1;
627                                 }
628                         }
629
630                         /*
631                          * If we used all the data then
632                          * return here.
633                          */
634
635                         if(n == data_used) {
636                                 return n;
637                         } else {
638                                 cache_flush_needed = True;
639                         }
640
641                         /*
642                          * Move the start of data forward by the amount used,
643                          * cut down the amount left by the same amount.
644                          */
645
646                         data += data_used;
647                         pos += data_used;
648                         n -= data_used;
649
650                         DO_PROFILE_INC(writecache_abutted_writes);
651                         total_written = data_used;
652
653                         write_path = 3;
654
655                 } else if ( (pos >= wcp->file_size) &&
656                             (n == 1) &&
657                             (wcp->file_size == wcp->offset + wcp->data_size) &&
658                             (pos < wcp->file_size + wcp->alloc_size)) {
659
660                         /*
661
662                 End of file ---->|
663
664                  +---------------+---------------+
665                  | Cached data   | Cache buffer  |
666                  +---------------+---------------+
667
668                                  |<------- allocated size ---------------->|
669
670                                                          +--------+
671                                                          | 1 Byte |
672                                                          +--------+
673
674                         MS-Office seems to do this a lot to determine if there's enough
675                         space on the filesystem to write a new file.
676
677                         Change to :
678
679                 End of file ---->|
680                                  +-----------------------+--------+
681                                  | Zeroed Cached data    | 1 Byte |
682                                  +-----------------------+--------+
683                         */
684
685                         flush_write_cache(fsp, WRITE_FLUSH);
686                         wcp->offset = wcp->file_size;
687                         wcp->data_size = pos - wcp->file_size + 1;
688                         memset(wcp->data, '\0', wcp->data_size);
689                         memcpy(wcp->data + wcp->data_size-1, data, 1);
690
691                         /*
692                          * Update the file size if changed.
693                          */
694
695                         if (wcp->offset + wcp->data_size > wcp->file_size) {
696                                 if (wcp_file_size_change(fsp) == -1) {
697                                         return -1;
698                                 }
699                         }
700
701                         return n;
702
703                 } else {
704
705                         /* ASCII art..... JRA.
706
707    Case 1).
708
709                         +---------------+---------------+
710                         | Cached data   | Cache buffer  |
711                         +---------------+---------------+
712
713                                                               +-------------------+
714                                                               | Data to write     |
715                                                               +-------------------+
716
717    Case 2).
718
719                            +---------------+---------------+
720                            | Cached data   | Cache buffer  |
721                            +---------------+---------------+
722
723    +-------------------+
724    | Data to write     |
725    +-------------------+
726
727     Case 3).
728
729                            +---------------+---------------+
730                            | Cached data   | Cache buffer  |
731                            +---------------+---------------+
732
733                   +-----------------------------------------------------+
734                   | Data to write                                       |
735                   +-----------------------------------------------------+
736
737                   */
738
739                         /*
740                          * Write is bigger than buffer, or there is no overlap on the
741                          * low or high ends.
742                          */
743
744                         DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
745 len = %u\n",fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
746
747                         /*
748                          * If write would fit in the cache, and is larger than
749                          * the data already in the cache, flush the cache and
750                          * preferentially copy the data new data into it. Otherwise
751                          * just write the data directly.
752                          */
753
754                         if ( n <= wcp->alloc_size && n > wcp->data_size) {
755                                 cache_flush_needed = True;
756                         } else {
757                                 ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
758
759                                 /*
760                                  * If the write overlaps the entire cache, then
761                                  * discard the current contents of the cache.
762                                  * Fix from Rasmus Borup Hansen rbh@math.ku.dk.
763                                  */
764
765                                 if ((pos <= wcp->offset) &&
766                                                 (pos + n >= wcp->offset + wcp->data_size) ) {
767                                         DEBUG(9,("write_file: discarding overwritten write \
768 cache: fd = %d, off=%.0f, size=%u\n", fsp->fh->fd, (double)wcp->offset, (unsigned int)wcp->data_size ));
769                                         wcp->data_size = 0;
770                                 }
771
772                                 DO_PROFILE_INC(writecache_direct_writes);
773                                 if (ret == -1) {
774                                         return ret;
775                                 }
776
777                                 if (pos + ret > wcp->file_size) {
778                                         wcp->file_size = pos + ret;
779                                 }
780
781                                 return ret;
782                         }
783
784                         write_path = 4;
785
786                 }
787
788                 if (cache_flush_needed) {
789                         DEBUG(3,("WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
790 n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
791                                 write_path, fsp->fh->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
792                                 (double)wcp->offset, (unsigned int)wcp->data_size ));
793
794                         flush_write_cache(fsp, WRITE_FLUSH);
795                 }
796         }
797
798         /*
799          * If the write request is bigger than the cache
800          * size, write it all out.
801          */
802
803         if (n > wcp->alloc_size ) {
804                 ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
805                 if (ret == -1) {
806                         return -1;
807                 }
808
809                 if (pos + ret > wcp->file_size) {
810                         wcp->file_size = pos + n;
811                 }
812
813                 DO_PROFILE_INC(writecache_direct_writes);
814                 return total_written + n;
815         }
816
817         /*
818          * If there's any data left, cache it.
819          */
820
821         if (n) {
822 #ifdef WITH_PROFILE
823                 if (wcp->data_size) {
824                         DO_PROFILE_INC(writecache_abutted_writes);
825                 } else {
826                         DO_PROFILE_INC(writecache_init_writes);
827                 }
828 #endif
829
830                 if ((wcp->data_size == 0)
831                     && (pos > wcp->file_size)
832                     && (pos + n <= wcp->file_size + wcp->alloc_size)) {
833                         /*
834                          * This is a write completely beyond the
835                          * current EOF, but within reach of the write
836                          * cache. We expect fill-up writes pretty
837                          * soon, so it does not make sense to start
838                          * the write cache at the current
839                          * offset. These fill-up writes would trigger
840                          * separate pwrites or even unnecessary cache
841                          * flushes because they overlap if this is a
842                          * one-byte allocating write.
843                          */
844                         wcp->offset = wcp->file_size;
845                         wcp->data_size = pos - wcp->file_size;
846                         memset(wcp->data, 0, wcp->data_size);
847                 }
848
849                 memcpy(wcp->data+wcp->data_size, data, n);
850                 if (wcp->data_size == 0) {
851                         wcp->offset = pos;
852                         DO_PROFILE_INC(writecache_num_write_caches);
853                 }
854                 wcp->data_size += n;
855
856                 /*
857                  * Update the file size if changed.
858                  */
859
860                 if (wcp->offset + wcp->data_size > wcp->file_size) {
861                         if (wcp_file_size_change(fsp) == -1) {
862                                 return -1;
863                         }
864                 }
865                 DEBUG(9,("wcp->offset = %.0f wcp->data_size = %u cache return %u\n",
866                         (double)wcp->offset, (unsigned int)wcp->data_size, (unsigned int)n));
867
868                 total_written += n;
869                 return total_written; /* .... that's a write :) */
870         }
871   
872         return total_written;
873 }
874
875 /****************************************************************************
876  Delete the write cache structure.
877 ****************************************************************************/
878
879 void delete_write_cache(files_struct *fsp)
880 {
881         write_cache *wcp;
882
883         if(!fsp) {
884                 return;
885         }
886
887         if(!(wcp = fsp->wcp)) {
888                 return;
889         }
890
891         DO_PROFILE_DEC(writecache_allocated_write_caches);
892         allocated_write_caches--;
893
894         SMB_ASSERT(wcp->data_size == 0);
895
896         SAFE_FREE(wcp->data);
897         SAFE_FREE(fsp->wcp);
898
899         DEBUG(10,("delete_write_cache: File %s deleted write cache\n",
900                   fsp_str_dbg(fsp)));
901 }
902
903 /****************************************************************************
904  Setup the write cache structure.
905 ****************************************************************************/
906
907 static bool setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
908 {
909         ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
910         write_cache *wcp;
911
912         if (allocated_write_caches >= MAX_WRITE_CACHES) {
913                 return False;
914         }
915
916         if(alloc_size == 0 || fsp->wcp) {
917                 return False;
918         }
919
920         if((wcp = SMB_MALLOC_P(write_cache)) == NULL) {
921                 DEBUG(0,("setup_write_cache: malloc fail.\n"));
922                 return False;
923         }
924
925         wcp->file_size = file_size;
926         wcp->offset = 0;
927         wcp->alloc_size = alloc_size;
928         wcp->data_size = 0;
929         if((wcp->data = (char *)SMB_MALLOC(wcp->alloc_size)) == NULL) {
930                 DEBUG(0,("setup_write_cache: malloc fail for buffer size %u.\n",
931                         (unsigned int)wcp->alloc_size ));
932                 SAFE_FREE(wcp);
933                 return False;
934         }
935
936         memset(wcp->data, '\0', wcp->alloc_size );
937
938         fsp->wcp = wcp;
939         DO_PROFILE_INC(writecache_allocated_write_caches);
940         allocated_write_caches++;
941
942         DEBUG(10,("setup_write_cache: File %s allocated write cache size %lu\n",
943                   fsp_str_dbg(fsp), (unsigned long)wcp->alloc_size));
944
945         return True;
946 }
947
948 /****************************************************************************
949  Cope with a size change.
950 ****************************************************************************/
951
952 void set_filelen_write_cache(files_struct *fsp, SMB_OFF_T file_size)
953 {
954         if(fsp->wcp) {
955                 /* The cache *must* have been flushed before we do this. */
956                 if (fsp->wcp->data_size != 0) {
957                         char *msg;
958                         if (asprintf(&msg, "set_filelen_write_cache: size change "
959                                  "on file %s with write cache size = %lu\n",
960                                  fsp->fsp_name->base_name,
961                                  (unsigned long)fsp->wcp->data_size) != -1) {
962                                 smb_panic(msg);
963                         } else {
964                                 smb_panic("set_filelen_write_cache");
965                         }
966                 }
967                 fsp->wcp->file_size = file_size;
968         }
969 }
970
971 /*******************************************************************
972  Flush a write cache struct to disk.
973 ********************************************************************/
974
975 ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
976 {
977         write_cache *wcp = fsp->wcp;
978         size_t data_size;
979         ssize_t ret;
980
981         if(!wcp || !wcp->data_size) {
982                 return 0;
983         }
984
985         data_size = wcp->data_size;
986         wcp->data_size = 0;
987
988         DO_PROFILE_DEC_INC(writecache_num_write_caches,writecache_flushed_writes[reason]);
989
990         DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
991                 fsp->fh->fd, (double)wcp->offset, (unsigned int)data_size));
992
993 #ifdef WITH_PROFILE
994         if(data_size == wcp->alloc_size) {
995                 DO_PROFILE_INC(writecache_num_perfect_writes);
996         }
997 #endif
998
999         ret = real_write_file(NULL, fsp, wcp->data, wcp->offset, data_size);
1000
1001         /*
1002          * Ensure file size if kept up to date if write extends file.
1003          */
1004
1005         if ((ret != -1) && (wcp->offset + ret > wcp->file_size)) {
1006                 wcp->file_size = wcp->offset + ret;
1007         }
1008
1009         return ret;
1010 }
1011
1012 /*******************************************************************
1013 sync a file
1014 ********************************************************************/
1015
1016 NTSTATUS sync_file(connection_struct *conn, files_struct *fsp, bool write_through)
1017 {
1018         if (fsp->fh->fd == -1)
1019                 return NT_STATUS_INVALID_HANDLE;
1020
1021         if (lp_strict_sync(SNUM(conn)) &&
1022             (lp_syncalways(SNUM(conn)) || write_through)) {
1023                 int ret = flush_write_cache(fsp, SYNC_FLUSH);
1024                 if (ret == -1) {
1025                         return map_nt_error_from_unix(errno);
1026                 }
1027                 ret = SMB_VFS_FSYNC(fsp);
1028                 if (ret == -1) {
1029                         return map_nt_error_from_unix(errno);
1030                 }
1031         }
1032         return NT_STATUS_OK;
1033 }
1034
1035 /************************************************************
1036  Perform a stat whether a valid fd or not.
1037 ************************************************************/
1038
1039 int fsp_stat(files_struct *fsp)
1040 {
1041         if (fsp->fh->fd == -1) {
1042                 if (fsp->posix_open) {
1043                         return SMB_VFS_LSTAT(fsp->conn, fsp->fsp_name);
1044                 } else {
1045                         return SMB_VFS_STAT(fsp->conn, fsp->fsp_name);
1046                 }
1047         } else {
1048                 return SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st);
1049         }
1050 }