merge from samba_3_0
[samba.git] / source3 / smbd / fileio.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 1.9.
4    read/write to a files_struct
5    Copyright (C) Andrew Tridgell 1992-1998
6    Copyright (C) Jeremy Allison 2000-2002. - write cache.
7    
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 */
22
23 #include "includes.h"
24
25 static BOOL setup_write_cache(files_struct *, SMB_OFF_T);
26
27 /****************************************************************************
28  Seek a file. Try to avoid the seek if possible.
29 ****************************************************************************/
30
31 static SMB_OFF_T seek_file(files_struct *fsp,SMB_OFF_T pos)
32 {
33         SMB_OFF_T seek_ret;
34
35         seek_ret = fsp->conn->vfs_ops.lseek(fsp,fsp->fd,pos,SEEK_SET);
36
37         if(seek_ret == -1) {
38                 DEBUG(0,("seek_file: (%s) sys_lseek failed. Error was %s\n",
39                         fsp->fsp_name, strerror(errno) ));
40                 fsp->pos = -1;
41                 return -1;
42         }
43
44         fsp->pos = seek_ret;
45
46         DEBUG(10,("seek_file (%s): requested pos = %.0f, new pos = %.0f\n",
47                 fsp->fsp_name, (double)pos, (double)fsp->pos ));
48
49         return(fsp->pos);
50 }
51
52 /****************************************************************************
53  Read from write cache if we can.
54 ****************************************************************************/
55
56
57 static BOOL read_from_write_cache(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
58 {
59         write_cache *wcp = fsp->wcp;
60
61         if(!wcp)
62                 return False;
63
64         if(n > wcp->data_size || pos < wcp->offset || pos + n > wcp->offset + wcp->data_size)
65                 return False;
66
67         memcpy(data, wcp->data + (pos - wcp->offset), n);
68
69         DO_PROFILE_INC(writecache_read_hits);
70
71         return True;
72 }
73
74 /****************************************************************************
75  Read from a file.
76 ****************************************************************************/
77
78 ssize_t read_file(files_struct *fsp,char *data,SMB_OFF_T pos,size_t n)
79 {
80         ssize_t ret=0,readret;
81
82         /* you can't read from print files */
83         if (fsp->print_file)
84                 return -1;
85
86         /*
87          * Serve from write cache if we can.
88          */
89
90         if(read_from_write_cache(fsp, data, pos, n))
91                 return n;
92
93         flush_write_cache(fsp, READ_FLUSH);
94
95         if (seek_file(fsp,pos) == -1) {
96                 DEBUG(3,("read_file: Failed to seek to %.0f\n",(double)pos));
97                 return(ret);
98         }
99   
100         if (n > 0) {
101 #ifdef DMF_FIX
102                 int numretries = 3;
103 tryagain:
104                 readret = fsp->conn->vfs_ops.read(fsp,fsp->fd,data,n);
105                 if (readret == -1) {
106                         if ((errno == EAGAIN) && numretries) {
107                                 DEBUG(3,("read_file EAGAIN retry in 10 seconds\n"));
108                                 (void)sleep(10);
109                                 --numretries;
110                                 goto tryagain;
111                         }
112                         return -1;
113                 }
114 #else /* NO DMF fix. */
115                 readret = fsp->conn->vfs_ops.read(fsp,fsp->fd,data,n);
116                 if (readret == -1)
117                         return -1;
118 #endif
119                 if (readret > 0)
120                         ret += readret;
121         }
122
123         DEBUG(10,("read_file (%s): pos = %.0f, size = %lu, returned %lu\n",
124                 fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
125
126         return(ret);
127 }
128
129 /* how many write cache buffers have been allocated */
130 static unsigned int allocated_write_caches;
131
132 /****************************************************************************
133  *Really* write to a file.
134 ****************************************************************************/
135
136 static ssize_t real_write_file(files_struct *fsp,char *data,SMB_OFF_T pos, size_t n)
137 {
138         ssize_t ret;
139
140         if ((pos != -1) && (seek_file(fsp,pos) == -1))
141                 return -1;
142
143         ret = vfs_write_data(fsp,data,n);
144
145         DEBUG(10,("real_write_file (%s): pos = %.0f, size = %lu, returned %ld\n",
146                 fsp->fsp_name, (double)pos, (unsigned long)n, (long)ret ));
147
148         return ret;
149 }
150
151 /****************************************************************************
152 write to a file
153 ****************************************************************************/
154
155 ssize_t write_file(files_struct *fsp, char *data, SMB_OFF_T pos, size_t n)
156 {
157         write_cache *wcp = fsp->wcp;
158         ssize_t total_written = 0;
159         int write_path = -1; 
160
161         if (fsp->print_file)
162                 return print_job_write(SNUM(fsp->conn), fsp->print_jobid, data, n);
163
164         if (!fsp->can_write) {
165                 errno = EPERM;
166                 return(0);
167         }
168
169         if (!fsp->modified) {
170                 SMB_STRUCT_STAT st;
171                 fsp->modified = True;
172
173                 if (fsp->conn->vfs_ops.fstat(fsp,fsp->fd,&st) == 0) {
174                         int dosmode = dos_mode(fsp->conn,fsp->fsp_name,&st);
175                         fsp->size = st.st_size;
176                         if (MAP_ARCHIVE(fsp->conn) && !IS_DOS_ARCHIVE(dosmode))
177                                 file_chmod(fsp->conn,fsp->fsp_name,dosmode | aARCH,&st);
178
179                         /*
180                          * If this is the first write and we have an exclusive oplock then setup
181                          * the write cache.
182                          */
183
184                         if (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && !wcp) {
185                                 setup_write_cache(fsp, st.st_size);
186                                 wcp = fsp->wcp;
187                         } 
188                 }  
189         }
190
191 #ifdef WITH_PROFILE
192         DO_PROFILE_INC(writecache_total_writes);
193         if (!fsp->oplock_type) {
194                 DO_PROFILE_INC(writecache_non_oplock_writes);
195         }
196 #endif
197
198         /*
199          * If this file is level II oplocked then we need
200          * to grab the shared memory lock and inform all
201          * other files with a level II lock that they need
202          * to flush their read caches. We keep the lock over
203          * the shared memory area whilst doing this.
204          */
205
206         release_level_2_oplocks_on_change(fsp);
207
208 #ifdef WITH_PROFILE
209         if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
210                 DEBUG(3,("WRITECACHE: initwrites=%u abutted=%u total=%u \
211 nonop=%u allocated=%u active=%u direct=%u perfect=%u readhits=%u\n",
212                         profile_p->writecache_init_writes,
213                         profile_p->writecache_abutted_writes,
214                         profile_p->writecache_total_writes,
215                         profile_p->writecache_non_oplock_writes,
216                         profile_p->writecache_allocated_write_caches,
217                         profile_p->writecache_num_write_caches,
218                         profile_p->writecache_direct_writes,
219                         profile_p->writecache_num_perfect_writes,
220                         profile_p->writecache_read_hits ));
221
222                 DEBUG(3,("WRITECACHE: Flushes SEEK=%d, READ=%d, WRITE=%d, READRAW=%d, OPLOCK=%d, CLOSE=%d, SYNC=%d\n",
223                         profile_p->writecache_flushed_writes[SEEK_FLUSH],
224                         profile_p->writecache_flushed_writes[READ_FLUSH],
225                         profile_p->writecache_flushed_writes[WRITE_FLUSH],
226                         profile_p->writecache_flushed_writes[READRAW_FLUSH],
227                         profile_p->writecache_flushed_writes[OPLOCK_RELEASE_FLUSH],
228                         profile_p->writecache_flushed_writes[CLOSE_FLUSH],
229                         profile_p->writecache_flushed_writes[SYNC_FLUSH] ));
230         }
231 #endif
232
233         if(!wcp) {
234                 DO_PROFILE_INC(writecache_direct_writes);
235                 total_written = real_write_file(fsp, data, pos, n);
236                 if ((total_written != -1) && (pos + total_written > fsp->size))
237                         fsp->size = pos + total_written;
238                 return total_written;
239         }
240
241         DEBUG(9,("write_file (%s)(fd=%d pos=%.0f size=%u) wcp->offset=%.0f wcp->data_size=%u\n",
242                 fsp->fsp_name, fsp->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size));
243
244         /* 
245          * If we have active cache and it isn't contiguous then we flush.
246          * NOTE: There is a small problem with running out of disk ....
247          */
248
249         if (wcp->data_size) {
250
251                 BOOL cache_flush_needed = False;
252
253                 if ((pos >= wcp->offset) && (pos <= wcp->offset + wcp->data_size)) {
254       
255                         /* ASCII art.... JRA.
256
257       +--------------+-----
258       | Cached data  | Rest of allocated cache buffer....
259       +--------------+-----
260
261             +-------------------+
262             | Data to write     |
263             +-------------------+
264
265                         */
266
267                         /*
268                          * Start of write overlaps or abutts the existing data.
269                          */
270
271                         size_t data_used = MIN((wcp->alloc_size - (pos - wcp->offset)), n);
272
273                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
274
275                         /*
276                          * Update the current buffer size with the new data.
277                          */
278
279                         if(pos + data_used > wcp->offset + wcp->data_size)
280                                 wcp->data_size = pos + data_used - wcp->offset;
281
282                         /*
283                          * Update the file size if changed.
284                          */
285
286                         if (wcp->offset + wcp->data_size > wcp->file_size)
287                                 fsp->size = wcp->file_size = wcp->offset + wcp->data_size;
288
289                         /*
290                          * If we used all the data then
291                          * return here.
292                          */
293
294                         if(n == data_used)
295                                 return n;
296                         else
297                                 cache_flush_needed = True;
298
299                         /*
300                          * Move the start of data forward by the amount used,
301                          * cut down the amount left by the same amount.
302                          */
303
304                         data += data_used;
305                         pos += data_used;
306                         n -= data_used;
307
308                         DO_PROFILE_INC(writecache_abutted_writes);
309                         total_written = data_used;
310
311                         write_path = 1;
312
313                 } else if ((pos < wcp->offset) && (pos + n > wcp->offset) && 
314                                         (pos + n <= wcp->offset + wcp->alloc_size)) {
315
316                         /* ASCII art.... JRA.
317
318                         +---------------+
319                         | Cache buffer  |
320                         +---------------+
321
322             +-------------------+
323             | Data to write     |
324             +-------------------+
325
326                         */
327
328                         /*
329                          * End of write overlaps the existing data.
330                          */
331
332                         size_t data_used = pos + n - wcp->offset;
333
334                         memcpy(wcp->data, data + n - data_used, data_used);
335
336                         /*
337                          * Update the current buffer size with the new data.
338                          */
339
340                         if(pos + n > wcp->offset + wcp->data_size)
341                                 wcp->data_size = pos + n - wcp->offset;
342
343                         /*
344                          * Update the file size if changed.
345                          */
346
347                         if (wcp->offset + wcp->data_size > wcp->file_size)
348                                 fsp->size = wcp->file_size = wcp->offset + wcp->data_size;
349
350                         /*
351                          * We don't need to move the start of data, but we
352                          * cut down the amount left by the amount used.
353                          */
354
355                         n -= data_used;
356
357                         /*
358                          * We cannot have used all the data here.
359                          */
360
361                         cache_flush_needed = True;
362
363                         DO_PROFILE_INC(writecache_abutted_writes);
364                         total_written = data_used;
365
366                         write_path = 2;
367
368                 } else if ( (pos >= wcp->file_size) && 
369                                         (wcp->offset + wcp->data_size == wcp->file_size) &&
370                                         (pos > wcp->offset + wcp->data_size) && 
371                                         (pos < wcp->offset + wcp->alloc_size) ) {
372
373                         /* ASCII art.... JRA.
374
375                        End of file ---->|
376
377                         +---------------+---------------+
378                         | Cached data   | Cache buffer  |
379                         +---------------+---------------+
380
381                                               +-------------------+
382                                               | Data to write     |
383                                               +-------------------+
384
385                         */
386
387                         /*
388                          * Non-contiguous write part of which fits within
389                          * the cache buffer and is extending the file
390                          * and the cache contents reflect the current
391                          * data up to the current end of the file.
392                          */
393
394                         size_t data_used;
395
396                         if(pos + n <= wcp->offset + wcp->alloc_size)
397                                 data_used = n;
398                         else
399                                 data_used = wcp->offset + wcp->alloc_size - pos;
400
401                         /*
402                          * Fill in the non-continuous area with zeros.
403                          */
404
405                         memset(wcp->data + wcp->data_size, '\0',
406                                 pos - (wcp->offset + wcp->data_size) );
407
408                         memcpy(wcp->data + (pos - wcp->offset), data, data_used);
409
410                         /*
411                          * Update the current buffer size with the new data.
412                          */
413
414                         if(pos + data_used > wcp->offset + wcp->data_size)
415                                 wcp->data_size = pos + data_used - wcp->offset;
416
417                         /*
418                          * Update the file size if changed.
419                          */
420
421                         if (wcp->offset + wcp->data_size > wcp->file_size)
422                                 fsp->size = wcp->file_size = wcp->offset + wcp->data_size;
423
424                         /*
425                          * If we used all the data then
426                          * return here.
427                          */
428
429                         if(n == data_used)
430                                 return n;
431                         else
432                                 cache_flush_needed = True;
433
434                         /*
435                          * Move the start of data forward by the amount used,
436                          * cut down the amount left by the same amount.
437                          */
438
439                         data += data_used;
440                         pos += data_used;
441                         n -= data_used;
442
443                         DO_PROFILE_INC(writecache_abutted_writes);
444                         total_written = data_used;
445
446                         write_path = 3;
447
448                 } else {
449
450                         /* ASCII art..... JRA.
451
452    Case 1).
453
454                         +---------------+---------------+
455                         | Cached data   | Cache buffer  |
456                         +---------------+---------------+
457
458                                                               +-------------------+
459                                                               | Data to write     |
460                                                               +-------------------+
461
462    Case 2).
463
464                            +---------------+---------------+
465                            | Cached data   | Cache buffer  |
466                            +---------------+---------------+
467
468    +-------------------+
469    | Data to write     |
470    +-------------------+
471
472     Case 3).
473
474                            +---------------+---------------+
475                            | Cached data   | Cache buffer  |
476                            +---------------+---------------+
477
478                   +-----------------------------------------------------+
479                   | Data to write                                       |
480                   +-----------------------------------------------------+
481
482                   */
483
484                         /*
485                          * Write is bigger than buffer, or there is no overlap on the
486                          * low or high ends.
487                          */
488
489                         DEBUG(9,("write_file: non cacheable write : fd = %d, pos = %.0f, len = %u, current cache pos = %.0f \
490 len = %u\n",fsp->fd, (double)pos, (unsigned int)n, (double)wcp->offset, (unsigned int)wcp->data_size ));
491
492                         /*
493                          * Update the file size if needed.
494                          */
495
496                         if(pos + n > wcp->file_size)
497                                 fsp->size = wcp->file_size = pos + n;
498
499                         /*
500                          * If write would fit in the cache, and is larger than
501                          * the data already in the cache, flush the cache and
502                          * preferentially copy the data new data into it. Otherwise
503                          * just write the data directly.
504                          */
505
506                         if ( n <= wcp->alloc_size && n > wcp->data_size) {
507                                 cache_flush_needed = True;
508                         } else {
509                                 ssize_t ret = real_write_file(fsp, data, pos, n);
510
511                                 /*
512                                  * If the write overlaps the entire cache, then
513                                  * discard the current contents of the cache.
514                                  * Fix from Rasmus Borup Hansen rbh@math.ku.dk.
515                                  */
516
517                                 if ((pos <= wcp->offset) &&
518                                                 (pos + n >= wcp->offset + wcp->data_size) ) {
519                                         DEBUG(9,("write_file: discarding overwritten write \
520 cache: fd = %d, off=%.0f, size=%u\n", fsp->fd, (double)wcp->offset, (unsigned int)wcp->data_size ));
521                                         wcp->data_size = 0;
522                                 }
523
524                                 DO_PROFILE_INC(writecache_direct_writes);
525                                 if (ret == -1)
526                                         return ret;
527
528                                 if (pos + ret > wcp->file_size)
529                                         fsp->size = wcp->file_size = pos + ret;
530
531                                 return ret;
532                         }
533
534                         write_path = 4;
535
536                 }
537
538                 if(wcp->data_size > wcp->file_size)
539                         fsp->size = wcp->file_size = wcp->data_size;
540
541                 if (cache_flush_needed) {
542                         DEBUG(3,("WRITE_FLUSH:%d: due to noncontinuous write: fd = %d, size = %.0f, pos = %.0f, \
543 n = %u, wcp->offset=%.0f, wcp->data_size=%u\n",
544                                 write_path, fsp->fd, (double)wcp->file_size, (double)pos, (unsigned int)n,
545                                 (double)wcp->offset, (unsigned int)wcp->data_size ));
546
547                         flush_write_cache(fsp, WRITE_FLUSH);
548                 }
549         }
550
551         /*
552          * If the write request is bigger than the cache
553          * size, write it all out.
554          */
555
556         if (n > wcp->alloc_size ) {
557                 ssize_t ret = real_write_file(fsp, data, pos, n);
558                 if (ret == -1)
559                         return -1;
560
561                 if (pos + ret > wcp->file_size)
562                         fsp->size = wcp->file_size = pos + n;
563
564                 DO_PROFILE_INC(writecache_direct_writes);
565                 return total_written + n;
566         }
567
568         /*
569          * If there's any data left, cache it.
570          */
571
572         if (n) {
573 #ifdef WITH_PROFILE
574                 if (wcp->data_size) {
575                         DO_PROFILE_INC(writecache_abutted_writes);
576                 } else {
577                         DO_PROFILE_INC(writecache_init_writes);
578                 }
579 #endif
580                 memcpy(wcp->data+wcp->data_size, data, n);
581                 if (wcp->data_size == 0) {
582                         wcp->offset = pos;
583                         DO_PROFILE_INC(writecache_num_write_caches);
584                 }
585                 wcp->data_size += n;
586
587                 /*
588                  * Update the file size if changed.
589                  */
590
591                 if (wcp->offset + wcp->data_size > wcp->file_size)
592                         fsp->size = wcp->file_size = wcp->offset + wcp->data_size;
593                 DEBUG(9,("wcp->offset = %.0f wcp->data_size = %u cache return %u\n",
594                         (double)wcp->offset, (unsigned int)wcp->data_size, (unsigned int)n));
595
596                 total_written += n;
597                 return total_written; /* .... that's a write :) */
598         }
599   
600         return total_written;
601 }
602
603 /****************************************************************************
604  Delete the write cache structure.
605 ****************************************************************************/
606
607 void delete_write_cache(files_struct *fsp)
608 {
609         write_cache *wcp;
610
611         if(!fsp)
612                 return;
613
614         if(!(wcp = fsp->wcp))
615                 return;
616
617         DO_PROFILE_DEC(writecache_allocated_write_caches);
618         allocated_write_caches--;
619
620         SMB_ASSERT(wcp->data_size == 0);
621
622         SAFE_FREE(wcp->data);
623         SAFE_FREE(fsp->wcp);
624
625         DEBUG(10,("delete_write_cache: File %s deleted write cache\n", fsp->fsp_name ));
626 }
627
628 /****************************************************************************
629  Setup the write cache structure.
630 ****************************************************************************/
631
632 static BOOL setup_write_cache(files_struct *fsp, SMB_OFF_T file_size)
633 {
634         ssize_t alloc_size = lp_write_cache_size(SNUM(fsp->conn));
635         write_cache *wcp;
636
637         if (allocated_write_caches >= MAX_WRITE_CACHES) 
638                 return False;
639
640         if(alloc_size == 0 || fsp->wcp)
641                 return False;
642
643         if((wcp = (write_cache *)malloc(sizeof(write_cache))) == NULL) {
644                 DEBUG(0,("setup_write_cache: malloc fail.\n"));
645                 return False;
646         }
647
648         wcp->file_size = file_size;
649         wcp->offset = 0;
650         wcp->alloc_size = alloc_size;
651         wcp->data_size = 0;
652         if((wcp->data = malloc(wcp->alloc_size)) == NULL) {
653                 DEBUG(0,("setup_write_cache: malloc fail for buffer size %u.\n",
654                         (unsigned int)wcp->alloc_size ));
655                 SAFE_FREE(wcp);
656                 return False;
657         }
658
659         memset(wcp->data, '\0', wcp->alloc_size );
660
661         fsp->wcp = wcp;
662         DO_PROFILE_INC(writecache_allocated_write_caches);
663         allocated_write_caches++;
664
665         DEBUG(10,("setup_write_cache: File %s allocated write cache size %u\n",
666                 fsp->fsp_name, wcp->alloc_size ));
667
668         return True;
669 }
670
671 /****************************************************************************
672  Cope with a size change.
673 ****************************************************************************/
674
675 void set_filelen_write_cache(files_struct *fsp, SMB_OFF_T file_size)
676 {
677         fsp->size = file_size;
678         if(fsp->wcp) {
679                 /* The cache *must* have been flushed before we do this. */
680                 if (fsp->wcp->data_size != 0) {
681                         pstring msg;
682                         slprintf(msg, sizeof(msg)-1, "set_filelen_write_cache: size change \
683 on file %s with write cache size = %u\n", fsp->fsp_name, fsp->wcp->data_size );
684                         smb_panic(msg);
685                 }
686                 fsp->wcp->file_size = file_size;
687         }
688 }
689
690 /*******************************************************************
691  Flush a write cache struct to disk.
692 ********************************************************************/
693
694 ssize_t flush_write_cache(files_struct *fsp, enum flush_reason_enum reason)
695 {
696         write_cache *wcp = fsp->wcp;
697         size_t data_size;
698         ssize_t ret;
699
700         if(!wcp || !wcp->data_size)
701                 return 0;
702
703         data_size = wcp->data_size;
704         wcp->data_size = 0;
705
706         DO_PROFILE_DEC_INC(writecache_num_write_caches,writecache_flushed_writes[reason]);
707
708         DEBUG(9,("flushing write cache: fd = %d, off=%.0f, size=%u\n",
709                 fsp->fd, (double)wcp->offset, (unsigned int)data_size));
710
711 #ifdef WITH_PROFILE
712         if(data_size == wcp->alloc_size)
713                 DO_PROFILE_INC(writecache_num_perfect_writes);
714 #endif
715
716         ret = real_write_file(fsp, wcp->data, wcp->offset, data_size);
717
718         /*
719          * Ensure file size if kept up to date if write extends file.
720          */
721
722         if ((ret != -1) && (wcp->offset + ret > wcp->file_size))
723                 wcp->file_size = wcp->offset + ret;
724
725         return ret;
726 }
727
728 /*******************************************************************
729 sync a file
730 ********************************************************************/
731
732 void sync_file(connection_struct *conn, files_struct *fsp)
733 {
734         if(lp_strict_sync(SNUM(conn)) && fsp->fd != -1) {
735                 flush_write_cache(fsp, SYNC_FLUSH);
736                 conn->vfs_ops.fsync(fsp,fsp->fd);
737         }
738 }
739
740
741 /************************************************************
742  Perform a stat whether a valid fd or not.
743 ************************************************************/
744
745 int fsp_stat(files_struct *fsp, SMB_STRUCT_STAT *pst)
746 {
747         if (fsp->fd == -1)
748                 return vfs_stat(fsp->conn, fsp->fsp_name, pst);
749         else
750                 return vfs_fstat(fsp,fsp->fd, pst);
751 }