s3-vfs: Use the system. namespace for fake ACLs
[kai/samba.git] / source3 / smbd / fileio.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 1.9.
4    read/write to a files_struct
5    Copyright (C) Andrew Tridgell 1992-1998
6    Copyright (C) Jeremy Allison 2000-2002. - write cache.
7    
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include "includes.h"
23 #include "printing.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "smbprofile.h"
27
28 struct write_cache {
29         off_t file_size;
30         off_t offset;
31         size_t alloc_size;
32         size_t data_size;
33         char *data;
34 };
35
36 static bool setup_write_cache(files_struct *, off_t);
37
38 /****************************************************************************
39  Read from write cache if we can.
40 ****************************************************************************/
41
42 static bool read_from_write_cache(files_struct *fsp,char *data,off_t pos,size_t n)
43 {
44         struct write_cache *wcp = fsp->wcp;
45
46         if(!wcp) {
47                 return False;
48         }
49
50         if( n > wcp->data_size || pos < wcp->offset || pos + n > wcp->offset + wcp->data_size) {
51                 return False;
52         }
53
54         memcpy(data, wcp->data + (pos - wcp->offset), n);
55
56         DO_PROFILE_INC(writecache_read_hits);
57
58         return True;
59 }
60
61 /****************************************************************************
62  Read from a file.
63 ****************************************************************************/
64
65 ssize_t read_file(files_struct *fsp,char *data,off_t pos,size_t n)
66 {
67         ssize_t ret = 0;
68
69         /* you can't read from print files */
70         if (fsp->print_file) {
71                 errno = EBADF;
72                 return -1;
73         }
74
75         /*
76          * Serve from write cache if we can.
77          */
78
79         if(read_from_write_cache(fsp, data, pos, n)) {
80                 fsp->fh->pos = pos + n;
81                 fsp->fh->position_information = fsp->fh->pos;
82                 return n;
83         }
84
85         flush_write_cache(fsp, READ_FLUSH);
86
87         fsp->fh->pos = pos;
88
89         if (n > 0) {
90                 ret = SMB_VFS_PREAD(fsp,data,n,pos);
91
92                 if (ret == -1) {
93                         return -1;
94                 }
95         }
96
97         DEBUG(10,("read_file (%s): pos = %.0f, size = %lu, returned %lu\n",
98                   fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
99
100         fsp->fh->pos += ret;
101         fsp->fh->position_information = fsp->fh->pos;
102
103         return(ret);
104 }
105
106 /****************************************************************************
107  *Really* write to a file.
108 ****************************************************************************/
109
110 static ssize_t real_write_file(struct smb_request *req,
111                                 files_struct *fsp,
112                                 const char *data,
113                                 off_t pos,
114                                 size_t n)
115 {
116         ssize_t ret;
117
118         if (pos == -1) {
119                 ret = vfs_write_data(req, fsp, data, n);
120         } else {
121                 fsp->fh->pos = pos;
122                 if (pos && lp_strict_allocate(SNUM(fsp->conn) &&
123                                 !fsp->is_sparse)) {
124                         if (vfs_fill_sparse(fsp, pos) == -1) {
125                                 return -1;
126                         }
127                 }
128                 ret = vfs_pwrite_data(req, fsp, data, n, pos);
129         }
130
131         DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
132                   fsp_str_dbg(fsp), (double)pos, (unsigned long)n, (long)ret));
133
134         if (ret != -1) {
135                 fsp->fh->pos += ret;
136
137 /* Yes - this is correct - writes don't update this. JRA. */
138 /* Found by Samba4 tests. */
139 #if 0
140                 fsp->position_information = fsp->pos;
141 #endif
142         }
143
144         return ret;
145 }
146
147 /****************************************************************************
148  File size cache change.
149  Updates size on disk but doesn't flush the cache.
150 ****************************************************************************/
151
152 static int wcp_file_size_change(files_struct *fsp)
153 {
154         int ret;
155         struct write_cache *wcp = fsp->wcp;
156
157         wcp->file_size = wcp->offset + wcp->data_size;
158         ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
159         if (ret == -1) {
160                 DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f "
161                          "error %s\n", fsp_str_dbg(fsp),
162                          (double)wcp->file_size, strerror(errno)));
163         }
164         return ret;
165 }
166
167 void update_write_time_handler(struct event_context *ctx,
168                                       struct timed_event *te,
169                                       struct timeval now,
170                                       void *private_data)
171 {
172         files_struct *fsp = (files_struct *)private_data;
173
174         DEBUG(5, ("Update write time on %s\n", fsp_str_dbg(fsp)));
175
176         /* change the write time in the open file db. */
177         (void)set_write_time(fsp->file_id, timespec_current());
178
179         /* And notify. */
180         notify_fname(fsp->conn, NOTIFY_ACTION_MODIFIED,
181                      FILE_NOTIFY_CHANGE_LAST_WRITE, fsp->fsp_name->base_name);
182
183         /* Remove the timed event handler. */
184         TALLOC_FREE(fsp->update_write_time_event);
185 }
186
187 /*********************************************************
188  Schedule a write time update for WRITE_TIME_UPDATE_USEC_DELAY
189  in the future.
190 *********************************************************/
191
192 void trigger_write_time_update(struct files_struct *fsp)
193 {
194         int delay;
195
196         if (fsp->posix_open) {
197                 /* Don't use delayed writes on POSIX files. */
198                 return;
199         }
200
201         if (fsp->write_time_forced) {
202                 /* No point - "sticky" write times
203                  * in effect.
204                  */
205                 return;
206         }
207
208         /* We need to remember someone did a write
209          * and update to current time on close. */
210
211         fsp->update_write_time_on_close = true;
212
213         if (fsp->update_write_time_triggered) {
214                 /*
215                  * We only update the write time after 2 seconds
216                  * on the first normal write. After that
217                  * no other writes affect this until close.
218                  */
219                 return;
220         }
221         fsp->update_write_time_triggered = true;
222
223         delay = lp_parm_int(SNUM(fsp->conn),
224                             "smbd", "writetimeupdatedelay",
225                             WRITE_TIME_UPDATE_USEC_DELAY);
226
227         DEBUG(5, ("Update write time %d usec later on %s\n",
228                   delay, fsp_str_dbg(fsp)));
229
230         /* trigger the update 2 seconds later */
231         fsp->update_write_time_event =
232                 tevent_add_timer(fsp->conn->sconn->ev_ctx, NULL,
233                                  timeval_current_ofs_usec(delay),
234                                  update_write_time_handler, fsp);
235 }
236
237 void trigger_write_time_update_immediate(struct files_struct *fsp)
238 {
239         struct smb_file_time ft;
240
241         if (fsp->posix_open) {
242                 /* Don't use delayed writes on POSIX files. */
243                 return;
244         }
245
246         if (fsp->write_time_forced) {
247                 /*
248                  * No point - "sticky" write times
249                  * in effect.
250                  */
251                 return;
252         }
253
254         TALLOC_FREE(fsp->update_write_time_event);
255         DEBUG(5, ("Update write time immediate on %s\n",
256                   fsp_str_dbg(fsp)));
257
258         /* After an immediate update, reset the trigger. */
259         fsp->update_write_time_triggered = true;
260         fsp->update_write_time_on_close = false;
261
262         ZERO_STRUCT(ft);
263         ft.mtime = timespec_current();
264
265         /* Update the time in the open file db. */
266         (void)set_write_time(fsp->file_id, ft.mtime);
267
268         /* Now set on disk - takes care of notify. */
269         (void)smb_set_file_time(fsp->conn, fsp, fsp->fsp_name, &ft, false);
270 }
271
272 void mark_file_modified(files_struct *fsp)
273 {
274         int dosmode;
275
276         if (fsp->modified) {
277                 return;
278         }
279
280         fsp->modified = true;
281
282         if (SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st) != 0) {
283                 return;
284         }
285         trigger_write_time_update(fsp);
286
287         if (fsp->posix_open) {
288                 return;
289         }
290         if (!(lp_store_dos_attributes(SNUM(fsp->conn)) ||
291               MAP_ARCHIVE(fsp->conn))) {
292                 return;
293         }
294
295         dosmode = dos_mode(fsp->conn, fsp->fsp_name);
296         if (IS_DOS_ARCHIVE(dosmode)) {
297                 return;
298         }
299         file_set_dosmode(fsp->conn, fsp->fsp_name,
300                          dosmode | FILE_ATTRIBUTE_ARCHIVE, NULL, false);
301 }
302
303 /****************************************************************************
304  Write to a file.
305 ****************************************************************************/
306
307 ssize_t write_file(struct smb_request *req,
308                         files_struct *fsp,
309                         const char *data,
310                         off_t pos,
311                         size_t n)
312 {
313         struct write_cache *wcp = fsp->wcp;
314         ssize_t total_written = 0;
315         int write_path = -1;
316
317         if (fsp->print_file) {
318                 uint32_t t;
319                 int ret;
320
321                 ret = print_spool_write(fsp, data, n, pos, &t);
322                 if (ret) {
323                         errno = ret;
324                         return -1;
325                 }
326                 return t;
327         }
328
329         if (!fsp->can_write) {
330                 errno = EPERM;
331                 return -1;
332         }
333
334         /*
335          * If this is the first write and we have an exclusive oplock
336          * then setup the write cache.
337          */
338
339         if (!fsp->modified &&
340             EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) &&
341             (wcp == NULL)) {
342                 setup_write_cache(fsp, fsp->fsp_name->st.st_ex_size);
343                 wcp = fsp->wcp;
344         }
345
346         mark_file_modified(fsp);
347
348 #ifdef WITH_PROFILE
349         DO_PROFILE_INC(writecache_total_writes);
350         if (!fsp->oplock_type) {
351                 DO_PROFILE_INC(writecache_non_oplock_writes);
352         }
353 #endif
354
355         /*
356          * If this file is level II oplocked then we need
357          * to grab the shared memory lock and inform all
358          * other files with a level II lock that they need
359          * to flush their read caches. We keep the lock over
360          * the shared memory area whilst doing this.
361          */
362
363         /* This should actually be improved to span the write. */
364         contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
365         contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
366
367 #ifdef WITH_PROFILE
368         if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
369                 DEBUG(3,("WRITECACHE: initwrites=%u abutted=%u total=%u \
370 nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
371                         profile_p->writecache_init_writes,
372                         profile_p->writecache_abutted_writes,
373                         profile_p->writecache_total_writes,
374                         profile_p->writecache_non_oplock_writes,
375                         profile_p->writecache_allocated_write_caches,
376                         profile_p->writecache_num_write_caches,
377                         profile_p->writecache_direct_writes,
378                         profile_p->writecache_num_perfect_writes,
379                         profile_p->writecache_read_hits ));
380
381                 DEBUG(3,("WRITECACHE: Flushes SEEK=%d, READ=%d, WRITE=%d, READRAW=%d, OPLOCK=%d, CLOSE=%d, SYNC=%d\n",
382                         profile_p->writecache_flushed_writes[SEEK_FLUSH],
383                         profile_p->writecache_flushed_writes[READ_FLUSH],
384                         profile_p->writecache_flushed_writes[WRITE_FLUSH],
385                         profile_p->writecache_flushed_writes[READRAW_FLUSH],
386                         profile_p->writecache_flushed_writes[OPLOCK_RELEASE_FLUSH],
387                         profile_p->writecache_flushed_writes[CLOSE_FLUSH],
388                         profile_p->writecache_flushed_writes[SYNC_FLUSH] ));
389         }
390 #endif
391
392         if (wcp && req->unread_bytes) {
393                 /* If we're using receivefile don't
394                  * deal with a write cache.
395                  */
396                 flush_write_cache(fsp, WRITE_FLUSH);
397                 delete_write_cache(fsp);
398                 wcp = NULL;
399         }
400
401         if(!wcp) {
402                 DO_PROFILE_INC(writecache_direct_writes);
403                 total_written = real_write_file(req, fsp, data, pos, n);
404                 return total_written;
405         }
406
407         DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f "
408                  "wcp->data_size=%u\n", fsp_str_dbg(fsp), fsp->fh->fd,
409                  (double)pos, (unsigned int)n, (double)wcp->offset,
410                  (unsigned int)wcp->data_size));
411
412         fsp->fh->pos = pos + n;
413
414         if ((n == 1) && (data[0] == '\0') && (pos > wcp->file_size)) {
415                 int ret;
416
417                 /*
418                  * This is a 1-byte write of a 0 beyond the EOF and
419                  * thus implicitly also beyond the current active
420                  * write cache, the typical file-extending (and
421                  * allocating, but we're using the write cache here)
422                  * write done by Windows. We just have to ftruncate
423                  * the file and rely on posix semantics to return
424                  * zeros for non-written file data that is within the
425                  * file length.
426                  *
427                  * We can not use wcp_file_size_change here because we
428                  * might have an existing write cache, and
429                  * wcp_file_size_change assumes a change to just the
430                  * end of the current write cache.
431                  */
432
433                 wcp->file_size = pos + 1;
434                 ret = SMB_VFS_FTRUNCATE(fsp, wcp->file_size);
435                 if (ret == -1) {
436                         DEBUG(0,("wcp_file_size_change (%s): ftruncate of size %.0f"
437                                  "error %s\n", fsp_str_dbg(fsp),
438                                  (double)wcp->file_size, strerror(errno)));
439                         return -1;
440                 }
441                 return 1;
442         }
443
444
445         /*
446          * If we have active cache and it isn't contiguous then we flush.
447          * NOTE: There is a small problem with running out of disk ....
448          */
449
450         if (wcp->data_size) {
451                 bool cache_flush_needed = False;
452
453                 if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
454       
455                         /* ASCII art.... JRA.
456
457       +--------------+-----
458       | Cached data  | Rest of allocated cache buffer....
459       +--------------+-----
460
461             +-------------------+
462             | Data to write     |
463             +-------------------+
464
465                         */
466
467                         /*
468                          * Start of write overlaps or abutts the existing data.
469                          */
470
471                         size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
472
473                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
474
475                         /*
476                          * Update the current buffer size with the new data.
477                          */
478
479                         if(pos + data_used > wcp->offset + wcp->data_size) {
480                                 wcp->data_size = pos + data_used - wcp->offset;
481                         }
482
483                         /*
484                          * Update the file size if changed.
485                          */
486
487                         if (wcp->offset + wcp->data_size > wcp->file_size) {
488                                 if (wcp_file_size_change(fsp) == -1) {
489                                         return -1;
490                                 }
491                         }
492
493                         /*
494                          * If we used all the data then
495                          * return here.
496                          */
497
498                         if(n == data_used) {
499                                 return n;
500                         } else {
501                                 cache_flush_needed = True;
502                         }
503                         /*
504                          * Move the start of data forward by the amount used,
505                          * cut down the amount left by the same amount.
506                          */
507
508                         data += data_used;
509                         pos += data_used;
510                         n -= data_used;
511
512                         DO_PROFILE_INC(writecache_abutted_writes);
513                         total_written = data_used;
514
515                         write_path = 1;
516
517                 } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
518                                         (pos + n <= wcp->offset + wcp->alloc_size)) {
519
520                         /* ASCII art.... JRA.
521
522                         +---------------+
523                         | Cache buffer  |
524                         +---------------+
525
526             +-------------------+
527             | Data to write     |
528             +-------------------+
529
530                         */
531
532                         /*
533                          * End of write overlaps the existing data.
534                          */
535
536                         size_t data_used = pos + n - wcp->offset;
537
538                         memcpy(wcp->data, data + n - data_used, data_used);
539
540                         /*
541                          * Update the current buffer size with the new data.
542                          */
543
544                         if(pos + n > wcp->offset + wcp->data_size) {
545                                 wcp->data_size = pos + n - wcp->offset;
546                         }
547
548                         /*
549                          * Update the file size if changed.
550                          */
551
552                         if (wcp->offset + wcp->data_size > wcp->file_size) {
553                                 if (wcp_file_size_change(fsp) == -1) {
554                                         return -1;
555                                 }
556                         }
557
558                         /*
559                          * We don't need to move the start of data, but we
560                          * cut down the amount left by the amount used.
561                          */
562
563                         n -= data_used;
564
565                         /*
566                          * We cannot have used all the data here.
567                          */
568
569                         cache_flush_needed = True;
570
571                         DO_PROFILE_INC(writecache_abutted_writes);
572                         total_written = data_used;
573
574                         write_path = 2;
575
576                 } else if ( (pos >= wcp->file_size) && 
577                                         (wcp->offset + wcp->data_size == wcp->file_size) &&
578                                         (pos > wcp->offset + wcp->data_size) && 
579                                         (pos < wcp->offset + wcp->alloc_size) ) {
580
581                         /* ASCII art.... JRA.
582
583                        End of file ---->|
584
585                         +---------------+---------------+
586                         | Cached data   | Cache buffer  |
587                         +---------------+---------------+
588
589                                               +-------------------+
590                                               | Data to write     |
591                                               +-------------------+
592
593                         */
594
595                         /*
596                          * Non-contiguous write part of which fits within
597                          * the cache buffer and is extending the file
598                          * and the cache contents reflect the current
599                          * data up to the current end of the file.
600                          */
601
602                         size_t data_used;
603
604                         if(pos + n <= wcp->offset + wcp->alloc_size) {
605                                 data_used = n;
606                         } else {
607                                 data_used = wcp->offset + wcp->alloc_size - pos;
608                         }
609
610                         /*
611                          * Fill in the non-continuous area with zeros.
612                          */
613
614                         memset(wcp->data + wcp->data_size, '\0',
615                                 pos - (wcp->offset + wcp->data_size) );
616
617                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
618
619                         /*
620                          * Update the current buffer size with the new data.
621                          */
622
623                         if(pos + data_used > wcp->offset + wcp->data_size) {
624                                 wcp->data_size = pos + data_used - wcp->offset;
625                         }
626
627                         /*
628                          * Update the file size if changed.
629                          */
630
631                         if (wcp->offset + wcp->data_size > wcp->file_size) {
632                                 if (wcp_file_size_change(fsp) == -1) {
633                                         return -1;
634                                 }
635                         }
636
637                         /*
638                          * If we used all the data then
639                          * return here.
640                          */
641
642                         if(n == data_used) {
643                                 return n;
644                         } else {
645                                 cache_flush_needed = True;
646                         }
647
648                         /*
649                          * Move the start of data forward by the amount used,
650                          * cut down the amount left by the same amount.
651                          */
652
653                         data += data_used;
654                         pos += data_used;
655                         n -= data_used;
656
657                         DO_PROFILE_INC(writecache_abutted_writes);
658                         total_written = data_used;
659
660                         write_path = 3;
661
662                 } else if ( (pos >= wcp->file_size) &&
663                             (n == 1) &&
664                             (wcp->file_size == wcp->offset + wcp->data_size) &&
665                             (pos < wcp->file_size + wcp->alloc_size)) {
666
667                         /*
668
669                 End of file ---->|
670
671                  +---------------+---------------+
672                  | Cached data   | Cache buffer  |
673                  +---------------+---------------+
674
675                                  |<------- allocated size ---------------->|
676
677                                                          +--------+
678                                                          | 1 Byte |
679                                                          +--------+
680
681                         MS-Office seems to do this a lot to determine if there's enough
682                         space on the filesystem to write a new file.
683
684                         Change to :
685
686                 End of file ---->|
687                                  +-----------------------+--------+
688                                  | Zeroed Cached data    | 1 Byte |
689                                  +-----------------------+--------+
690                         */
691
692                         flush_write_cache(fsp, WRITE_FLUSH);
693                         wcp->offset = wcp->file_size;
694                         wcp->data_size = pos - wcp->file_size + 1;
695                         memset(wcp->data, '\0', wcp->data_size);
696                         memcpy(wcp->data + wcp->data_size-1, data, 1);
697
698                         /*
699                          * Update the file size if changed.
700                          */
701
702                         if (wcp->offset + wcp->data_size > wcp->file_size) {
703                                 if (wcp_file_size_change(fsp) == -1) {
704                                         return -1;
705                                 }
706                         }
707
708                         return n;
709
710                 } else {
711
712                         /* ASCII art..... JRA.
713
714    Case 1).
715
716                         +---------------+---------------+
717                         | Cached data   | Cache buffer  |
718                         +---------------+---------------+
719
720                                                               +-------------------+
721                                                               | Data to write     |
722                                                               +-------------------+
723
724    Case 2).
725
726                            +---------------+---------------+
727                            | Cached data   | Cache buffer  |
728                            +---------------+---------------+
729
730    +-------------------+
731    | Data to write     |
732    +-------------------+
733
734     Case 3).
735
736                            +---------------+---------------+
737                            | Cached data   | Cache buffer  |
738                            +---------------+---------------+
739
740                   +-----------------------------------------------------+
741                   | Data to write                                       |
742                   +-----------------------------------------------------+
743
744                   */
745
746                         /*
747                          * Write is bigger than buffer, or there is no overlap on the
748                          * low or high ends.
749                          */
750
751                         DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
752 len = %u\n",fsp->fh->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
753
754                         /*
755                          * If write would fit in the cache, and is larger than
756                          * the data already in the cache, flush the cache and
757                          * preferentially copy the data new data into it. Otherwise
758                          * just write the data directly.
759                          */
760
761                         if ( n <= wcp->alloc_size && n > wcp->data_size) {
762                                 cache_flush_needed = True;
763                         } else {
764                                 ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
765
766                                 /*
767                                  * If the write overlaps the entire cache, then
768                                  * discard the current contents of the cache.
769                                  * Fix from Rasmus Borup Hansen rbh@math.ku.dk.
770                                  */
771
772                                 if ((pos <= wcp->offset) &&
773                                                 (pos + n >= wcp->offset + wcp->data_size) ) {
774                                         DEBUG(9,("write_file: discarding overwritten write \
775 cache: fd = %d, off=%.0f, size=%u\n", fsp->fh->fd, (double)wcp->offset, (unsigned int)wcp->data_size ));
776                                         wcp->data_size = 0;
777                                 }
778
779                                 DO_PROFILE_INC(writecache_direct_writes);
780                                 if (ret == -1) {
781                                         return ret;
782                                 }
783
784                                 if (pos + ret > wcp->file_size) {
785                                         wcp->file_size = pos + ret;
786                                 }
787
788                                 return ret;
789                         }
790
791                         write_path = 4;
792
793                 }
794
795                 if (cache_flush_needed) {
796                         DEBUG(3,("WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
797 n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
798                                 write_path, fsp->fh->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
799                                 (double)wcp->offset, (unsigned int)wcp->data_size ));
800
801                         flush_write_cache(fsp, WRITE_FLUSH);
802                 }
803         }
804
805         /*
806          * If the write request is bigger than the cache
807          * size, write it all out.
808          */
809
810         if (n > wcp->alloc_size ) {
811                 ssize_t ret = real_write_file(NULL,fsp, data, pos, n);
812                 if (ret == -1) {
813                         return -1;
814                 }
815
816                 if (pos + ret > wcp->file_size) {
817                         wcp->file_size = pos + n;
818                 }
819
820                 DO_PROFILE_INC(writecache_direct_writes);
821                 return total_written + n;
822         }
823
824         /*
825          * If there's any data left, cache it.
826          */
827
828         if (n) {
829 #ifdef WITH_PROFILE
830                 if (wcp->data_size) {
831                         DO_PROFILE_INC(writecache_abutted_writes);
832                 } else {
833                         DO_PROFILE_INC(writecache_init_writes);
834                 }
835 #endif
836
837                 if ((wcp->data_size == 0)
838                     && (pos > wcp->file_size)
839                     && (pos + n <= wcp->file_size + wcp->alloc_size)) {
840                         /*
841                          * This is a write completely beyond the
842                          * current EOF, but within reach of the write
843                          * cache. We expect fill-up writes pretty
844                          * soon, so it does not make sense to start
845                          * the write cache at the current
846                          * offset. These fill-up writes would trigger
847                          * separate pwrites or even unnecessary cache
848                          * flushes because they overlap if this is a
849                          * one-byte allocating write.
850                          */
851                         wcp->offset = wcp->file_size;
852                         wcp->data_size = pos - wcp->file_size;
853                         memset(wcp->data, 0, wcp->data_size);
854                 }
855
856                 memcpy(wcp->data+wcp->data_size, data, n);
857                 if (wcp->data_size == 0) {
858                         wcp->offset = pos;
859                         DO_PROFILE_INC(writecache_num_write_caches);
860                 }
861                 wcp->data_size += n;
862
863                 /*
864                  * Update the file size if changed.
865                  */
866
867                 if (wcp->offset + wcp->data_size > wcp->file_size) {
868                         if (wcp_file_size_change(fsp) == -1) {
869                                 return -1;
870                         }
871                 }
872                 DEBUG(9,("wcp->offset = %.0f wcp->data_size = %u cache return %u\n",
873                         (double)wcp->offset, (unsigned int)wcp->data_size, (unsigned int)n));
874
875                 total_written += n;
876                 return total_written; /* .... that's a write :) */
877         }
878   
879         return total_written;
880 }
881
882 /****************************************************************************
883  Delete the write cache structure.
884 ****************************************************************************/
885
886 void delete_write_cache(files_struct *fsp)
887 {
888         struct write_cache *wcp;
889
890         if(!fsp) {
891                 return;
892         }
893
894         if(!(wcp = fsp->wcp)) {
895                 return;
896         }
897
898         DO_PROFILE_DEC(writecache_allocated_write_caches);
899         allocated_write_caches--;
900
901         SMB_ASSERT(wcp->data_size == 0);
902
903         SAFE_FREE(wcp->data);
904         SAFE_FREE(fsp->wcp);
905
906         DEBUG(10,("delete_write_cache: File %s deleted write cache\n",
907                   fsp_str_dbg(fsp)));
908 }
909
910 /****************************************************************************
911  Setup the write cache structure.
912 ****************************************************************************/
913
914 static bool setup_write_cache(files_struct *fsp, off_t file_size)
915 {
916         ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
917         struct write_cache *wcp;
918
919         if (allocated_write_caches >= MAX_WRITE_CACHES) {
920                 return False;
921         }
922
923         if(alloc_size == 0 || fsp->wcp) {
924                 return False;
925         }
926
927         if((wcp = SMB_MALLOC_P(struct write_cache)) == NULL) {
928                 DEBUG(0,("setup_write_cache: malloc fail.\n"));
929                 return False;
930         }
931
932         wcp->file_size = file_size;
933         wcp->offset = 0;
934         wcp->alloc_size = alloc_size;
935         wcp->data_size = 0;
936         if((wcp->data = (char *)SMB_MALLOC(wcp->alloc_size)) == NULL) {
937                 DEBUG(0,("setup_write_cache: malloc fail for buffer size %u.\n",
938                         (unsigned int)wcp->alloc_size ));
939                 SAFE_FREE(wcp);
940                 return False;
941         }
942
943         memset(wcp->data, '\0', wcp->alloc_size );
944
945         fsp->wcp = wcp;
946         DO_PROFILE_INC(writecache_allocated_write_caches);
947         allocated_write_caches++;
948
949         DEBUG(10,("setup_write_cache: File %s allocated write cache size %lu\n",
950                   fsp_str_dbg(fsp), (unsigned long)wcp->alloc_size));
951
952         return True;
953 }
954
955 /****************************************************************************
956  Cope with a size change.
957 ****************************************************************************/
958
959 void set_filelen_write_cache(files_struct *fsp, off_t file_size)
960 {
961         if(fsp->wcp) {
962                 /* The cache *must* have been flushed before we do this. */
963                 if (fsp->wcp->data_size != 0) {
964                         char *msg;
965                         if (asprintf(&msg, "set_filelen_write_cache: size change "
966                                  "on file %s with write cache size = %lu\n",
967                                  fsp->fsp_name->base_name,
968                                  (unsigned long)fsp->wcp->data_size) != -1) {
969                                 smb_panic(msg);
970                         } else {
971                                 smb_panic("set_filelen_write_cache");
972                         }
973                 }
974                 fsp->wcp->file_size = file_size;
975         }
976 }
977
978 /*******************************************************************
979  Flush a write cache struct to disk.
980 ********************************************************************/
981
982 ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
983 {
984         struct write_cache *wcp = fsp->wcp;
985         size_t data_size;
986         ssize_t ret;
987
988         if(!wcp || !wcp->data_size) {
989                 return 0;
990         }
991
992         data_size = wcp->data_size;
993         wcp->data_size = 0;
994
995         DO_PROFILE_DEC_INC(writecache_num_write_caches,writecache_flushed_writes[reason]);
996
997         DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
998                 fsp->fh->fd, (double)wcp->offset, (unsigned int)data_size));
999
1000 #ifdef WITH_PROFILE
1001         if(data_size == wcp->alloc_size) {
1002                 DO_PROFILE_INC(writecache_num_perfect_writes);
1003         }
1004 #endif
1005
1006         ret = real_write_file(NULL, fsp, wcp->data, wcp->offset, data_size);
1007
1008         /*
1009          * Ensure file size if kept up to date if write extends file.
1010          */
1011
1012         if ((ret != -1) && (wcp->offset + ret > wcp->file_size)) {
1013                 wcp->file_size = wcp->offset + ret;
1014         }
1015
1016         return ret;
1017 }
1018
1019 /*******************************************************************
1020 sync a file
1021 ********************************************************************/
1022
1023 NTSTATUS sync_file(connection_struct *conn, files_struct *fsp, bool write_through)
1024 {
1025         if (fsp->fh->fd == -1)
1026                 return NT_STATUS_INVALID_HANDLE;
1027
1028         if (lp_strict_sync(SNUM(conn)) &&
1029             (lp_syncalways(SNUM(conn)) || write_through)) {
1030                 int ret = flush_write_cache(fsp, SYNC_FLUSH);
1031                 if (ret == -1) {
1032                         return map_nt_error_from_unix(errno);
1033                 }
1034                 ret = SMB_VFS_FSYNC(fsp);
1035                 if (ret == -1) {
1036                         return map_nt_error_from_unix(errno);
1037                 }
1038         }
1039         return NT_STATUS_OK;
1040 }
1041
1042 /************************************************************
1043  Perform a stat whether a valid fd or not.
1044 ************************************************************/
1045
1046 int fsp_stat(files_struct *fsp)
1047 {
1048         if (fsp->fh->fd == -1) {
1049                 if (fsp->posix_open) {
1050                         return SMB_VFS_LSTAT(fsp->conn, fsp->fsp_name);
1051                 } else {
1052                         return SMB_VFS_STAT(fsp->conn, fsp->fsp_name);
1053                 }
1054         } else {
1055                 return SMB_VFS_FSTAT(fsp, &fsp->fsp_name->st);
1056         }
1057 }