tsocket/bsd: fix bug #7115 FreeBSD includes the UDP header in FIONREAD
[abartlet/samba.git/.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25
26 #undef DBGC_CLASS
27 #define DBGC_CLASS DBGC_LOCKING
28
29 /*
30  * The pending close database handle.
31  */
32
33 static struct db_context *posix_pending_close_db;
34
35 /****************************************************************************
36  First - the functions that deal with the underlying system locks - these
37  functions are used no matter if we're mapping CIFS Windows locks or CIFS
38  POSIX locks onto POSIX.
39 ****************************************************************************/
40
41 /****************************************************************************
42  Utility function to map a lock type correctly depending on the open
43  mode of a file.
44 ****************************************************************************/
45
46 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
47 {
48         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
49                 /*
50                  * Many UNIX's cannot get a write lock on a file opened read-only.
51                  * Win32 locking semantics allow this.
52                  * Do the best we can and attempt a read-only lock.
53                  */
54                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
55                 return F_RDLCK;
56         }
57
58         /*
59          * This return should be the most normal, as we attempt
60          * to always open files read/write.
61          */
62
63         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
64 }
65
66 /****************************************************************************
67  Debugging aid :-).
68 ****************************************************************************/
69
70 static const char *posix_lock_type_name(int lock_type)
71 {
72         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
73 }
74
75 /****************************************************************************
76  Check to see if the given unsigned lock range is within the possible POSIX
77  range. Modifies the given args to be in range if possible, just returns
78  False if not.
79 ****************************************************************************/
80
81 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
82                                 uint64_t u_offset, uint64_t u_count)
83 {
84         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
85         SMB_OFF_T count = (SMB_OFF_T)u_count;
86
87         /*
88          * For the type of system we are, attempt to
89          * find the maximum positive lock offset as an SMB_OFF_T.
90          */
91
92 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
93
94         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
95
96 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
97
98         /*
99          * In this case SMB_OFF_T is 64 bits,
100          * and the underlying system can handle 64 bit signed locks.
101          */
102
103         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
104         SMB_OFF_T mask = (mask2<<1);
105         SMB_OFF_T max_positive_lock_offset = ~mask;
106
107 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
108
109         /*
110          * In this case either SMB_OFF_T is 32 bits,
111          * or the underlying system cannot handle 64 bit signed locks.
112          * All offsets & counts must be 2^31 or less.
113          */
114
115         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
116
117 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
118
119         /*
120          * POSIX locks of length zero mean lock to end-of-file.
121          * Win32 locks of length zero are point probes. Ignore
122          * any Win32 locks of length zero. JRA.
123          */
124
125         if (count == (SMB_OFF_T)0) {
126                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
127                 return False;
128         }
129
130         /*
131          * If the given offset was > max_positive_lock_offset then we cannot map this at all
132          * ignore this lock.
133          */
134
135         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
136                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
137                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
138                 return False;
139         }
140
141         /*
142          * We must truncate the count to less than max_positive_lock_offset.
143          */
144
145         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
146                 count = max_positive_lock_offset;
147         }
148
149         /*
150          * Truncate count to end at max lock offset.
151          */
152
153         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
154                 count = max_positive_lock_offset - offset;
155         }
156
157         /*
158          * If we ate all the count, ignore this lock.
159          */
160
161         if (count == 0) {
162                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
163                                 (double)u_offset, (double)u_count ));
164                 return False;
165         }
166
167         /*
168          * The mapping was successful.
169          */
170
171         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
172                         (double)offset, (double)count ));
173
174         *offset_out = offset;
175         *count_out = count;
176         
177         return True;
178 }
179
180 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
181                        struct files_struct *fsp, int op, SMB_OFF_T offset,
182                        SMB_OFF_T count, int type)
183 {
184         VFS_FIND(lock);
185         return handle->fns->lock(handle, fsp, op, offset, count, type);
186 }
187
188 /****************************************************************************
189  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
190  broken NFS implementations.
191 ****************************************************************************/
192
193 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
194 {
195         bool ret;
196
197         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
198
199         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
200
201         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
202
203                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
204                                         (double)offset,(double)count));
205                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
206                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
207
208                 /*
209                  * If the offset is > 0x7FFFFFFF then this will cause problems on
210                  * 32 bit NFS mounted filesystems. Just ignore it.
211                  */
212
213                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
214                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
215                         return True;
216                 }
217
218                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
219                         /* 32 bit NFS file system, retry with smaller offset */
220                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
221                         errno = 0;
222                         count &= 0x7fffffff;
223                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
224                 }
225         }
226
227         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
228         return ret;
229 }
230
231 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
232                           struct files_struct *fsp, SMB_OFF_T *poffset,
233                           SMB_OFF_T *pcount, int *ptype, pid_t *ppid)
234 {
235         VFS_FIND(getlock);
236         return handle->fns->getlock(handle, fsp, poffset, pcount, ptype, ppid);
237 }
238
239 /****************************************************************************
240  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
241  broken NFS implementations.
242 ****************************************************************************/
243
244 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
245 {
246         pid_t pid;
247         bool ret;
248
249         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
250                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
251
252         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
253
254         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
255
256                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
257                                         (double)*poffset,(double)*pcount));
258                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
259                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
260
261                 /*
262                  * If the offset is > 0x7FFFFFFF then this will cause problems on
263                  * 32 bit NFS mounted filesystems. Just ignore it.
264                  */
265
266                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
267                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
268                         return True;
269                 }
270
271                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
272                         /* 32 bit NFS file system, retry with smaller offset */
273                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
274                         errno = 0;
275                         *pcount &= 0x7fffffff;
276                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
277                 }
278         }
279
280         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
281         return ret;
282 }
283
284 /****************************************************************************
285  POSIX function to see if a file region is locked. Returns True if the
286  region is locked, False otherwise.
287 ****************************************************************************/
288
289 bool is_posix_locked(files_struct *fsp,
290                         uint64_t *pu_offset,
291                         uint64_t *pu_count,
292                         enum brl_type *plock_type,
293                         enum brl_flavour lock_flav)
294 {
295         SMB_OFF_T offset;
296         SMB_OFF_T count;
297         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
298
299         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
300                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
301                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
302
303         /*
304          * If the requested lock won't fit in the POSIX range, we will
305          * never set it, so presume it is not locked.
306          */
307
308         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
309                 return False;
310         }
311
312         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
313                 return False;
314         }
315
316         if (posix_lock_type == F_UNLCK) {
317                 return False;
318         }
319
320         if (lock_flav == POSIX_LOCK) {
321                 /* Only POSIX lock queries need to know the details. */
322                 *pu_offset = (uint64_t)offset;
323                 *pu_count = (uint64_t)count;
324                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
325         }
326         return True;
327 }
328
329 /****************************************************************************
330  Next - the functions that deal with in memory database storing representations
331  of either Windows CIFS locks or POSIX CIFS locks.
332 ****************************************************************************/
333
334 /* The key used in the in-memory POSIX databases. */
335
336 struct lock_ref_count_key {
337         struct file_id id;
338         char r;
339 }; 
340
341 /*******************************************************************
342  Form a static locking key for a dev/inode pair for the lock ref count
343 ******************************************************************/
344
345 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
346                                           struct lock_ref_count_key *tmp)
347 {
348         ZERO_STRUCTP(tmp);
349         tmp->id = fsp->file_id;
350         tmp->r = 'r';
351         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
352 }
353
354 /*******************************************************************
355  Convenience function to get an fd_array key from an fsp.
356 ******************************************************************/
357
358 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
359 {
360         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
361 }
362
363 /*******************************************************************
364  Create the in-memory POSIX lock databases.
365 ********************************************************************/
366
367 bool posix_locking_init(bool read_only)
368 {
369         if (posix_pending_close_db != NULL) {
370                 return true;
371         }
372
373         posix_pending_close_db = db_open_rbt(NULL);
374
375         if (posix_pending_close_db == NULL) {
376                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
377                 return false;
378         }
379
380         return true;
381 }
382
383 /*******************************************************************
384  Delete the in-memory POSIX lock databases.
385 ********************************************************************/
386
387 bool posix_locking_end(void)
388 {
389         /*
390          * Shouldn't we close all fd's here?
391          */
392         TALLOC_FREE(posix_pending_close_db);
393         return true;
394 }
395
396 /****************************************************************************
397  Next - the functions that deal with storing fd's that have outstanding
398  POSIX locks when closed.
399 ****************************************************************************/
400
401 /****************************************************************************
402  The records in posix_pending_close_tdb are composed of an array of ints
403  keyed by dev/ino pair.
404  The first int is a reference count of the number of outstanding locks on
405  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
406  were open on this dev/ino pair that should have been closed, but can't as
407  the lock ref count is non zero.
408 ****************************************************************************/
409
410 /****************************************************************************
411  Keep a reference count of the number of Windows locks open on this dev/ino
412  pair. Creates entry if it doesn't exist.
413 ****************************************************************************/
414
415 static void increment_windows_lock_ref_count(files_struct *fsp)
416 {
417         struct lock_ref_count_key tmp;
418         struct db_record *rec;
419         int lock_ref_count = 0;
420         NTSTATUS status;
421
422         rec = posix_pending_close_db->fetch_locked(
423                 posix_pending_close_db, talloc_tos(),
424                 locking_ref_count_key_fsp(fsp, &tmp));
425
426         SMB_ASSERT(rec != NULL);
427
428         if (rec->value.dptr != NULL) {
429                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
430                 memcpy(&lock_ref_count, rec->value.dptr,
431                        sizeof(lock_ref_count));
432         }
433
434         lock_ref_count++;
435
436         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
437                                                sizeof(lock_ref_count)), 0);
438
439         SMB_ASSERT(NT_STATUS_IS_OK(status));
440
441         TALLOC_FREE(rec);
442
443         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
444                   fsp_str_dbg(fsp), lock_ref_count));
445 }
446
447 /****************************************************************************
448  Bulk delete - subtract as many locks as we've just deleted.
449 ****************************************************************************/
450
451 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
452 {
453         struct lock_ref_count_key tmp;
454         struct db_record *rec;
455         int lock_ref_count = 0;
456         NTSTATUS status;
457
458         rec = posix_pending_close_db->fetch_locked(
459                 posix_pending_close_db, talloc_tos(),
460                 locking_ref_count_key_fsp(fsp, &tmp));
461
462         SMB_ASSERT((rec != NULL)
463                    && (rec->value.dptr != NULL)
464                    && (rec->value.dsize == sizeof(lock_ref_count)));
465
466         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
467
468         SMB_ASSERT(lock_ref_count > 0);
469
470         lock_ref_count -= dcount;
471
472         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
473                                                sizeof(lock_ref_count)), 0);
474
475         SMB_ASSERT(NT_STATUS_IS_OK(status));
476
477         TALLOC_FREE(rec);
478
479         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
480                   fsp_str_dbg(fsp), lock_ref_count));
481 }
482
483 static void decrement_windows_lock_ref_count(files_struct *fsp)
484 {
485         reduce_windows_lock_ref_count(fsp, 1);
486 }
487
488 /****************************************************************************
489  Fetch the lock ref count.
490 ****************************************************************************/
491
492 static int get_windows_lock_ref_count(files_struct *fsp)
493 {
494         struct lock_ref_count_key tmp;
495         TDB_DATA dbuf;
496         int res;
497         int lock_ref_count = 0;
498
499         res = posix_pending_close_db->fetch(
500                 posix_pending_close_db, talloc_tos(),
501                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
502
503         SMB_ASSERT(res == 0);
504
505         if (dbuf.dsize != 0) {
506                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
507                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
508                 TALLOC_FREE(dbuf.dptr);
509         }
510
511         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
512                   fsp_str_dbg(fsp), lock_ref_count));
513
514         return lock_ref_count;
515 }
516
517 /****************************************************************************
518  Delete a lock_ref_count entry.
519 ****************************************************************************/
520
521 static void delete_windows_lock_ref_count(files_struct *fsp)
522 {
523         struct lock_ref_count_key tmp;
524         struct db_record *rec;
525
526         rec = posix_pending_close_db->fetch_locked(
527                 posix_pending_close_db, talloc_tos(),
528                 locking_ref_count_key_fsp(fsp, &tmp));
529
530         SMB_ASSERT(rec != NULL);
531
532         /* Not a bug if it doesn't exist - no locks were ever granted. */
533
534         rec->delete_rec(rec);
535         TALLOC_FREE(rec);
536
537         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
538                   fsp_str_dbg(fsp)));
539 }
540
541 /****************************************************************************
542  Add an fd to the pending close tdb.
543 ****************************************************************************/
544
545 static void add_fd_to_close_entry(files_struct *fsp)
546 {
547         struct db_record *rec;
548         uint8_t *new_data;
549         NTSTATUS status;
550
551         rec = posix_pending_close_db->fetch_locked(
552                 posix_pending_close_db, talloc_tos(),
553                 fd_array_key_fsp(fsp));
554
555         SMB_ASSERT(rec != NULL);
556
557         new_data = TALLOC_ARRAY(
558                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
559
560         SMB_ASSERT(new_data != NULL);
561
562         memcpy(new_data, rec->value.dptr, rec->value.dsize);
563         memcpy(new_data + rec->value.dsize,
564                &fsp->fh->fd, sizeof(fsp->fh->fd));
565
566         status = rec->store(
567                 rec, make_tdb_data(new_data,
568                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
569
570         SMB_ASSERT(NT_STATUS_IS_OK(status));
571
572         TALLOC_FREE(rec);
573
574         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
575                   fsp->fh->fd, fsp_str_dbg(fsp)));
576 }
577
578 /****************************************************************************
579  Remove all fd entries for a specific dev/inode pair from the tdb.
580 ****************************************************************************/
581
582 static void delete_close_entries(files_struct *fsp)
583 {
584         struct db_record *rec;
585
586         rec = posix_pending_close_db->fetch_locked(
587                 posix_pending_close_db, talloc_tos(),
588                 fd_array_key_fsp(fsp));
589
590         SMB_ASSERT(rec != NULL);
591         rec->delete_rec(rec);
592         TALLOC_FREE(rec);
593 }
594
595 /****************************************************************************
596  Get the array of POSIX pending close records for an open fsp. Returns number
597  of entries.
598 ****************************************************************************/
599
600 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
601                                               files_struct *fsp, int **entries)
602 {
603         TDB_DATA dbuf;
604         int res;
605
606         res = posix_pending_close_db->fetch(
607                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
608                 &dbuf);
609
610         SMB_ASSERT(res == 0);
611
612         if (dbuf.dsize == 0) {
613                 *entries = NULL;
614                 return 0;
615         }
616
617         *entries = (int *)dbuf.dptr;
618         return (size_t)(dbuf.dsize / sizeof(int));
619 }
620
621 /****************************************************************************
622  Deal with pending closes needed by POSIX locking support.
623  Note that posix_locking_close_file() is expected to have been called
624  to delete all locks on this fsp before this function is called.
625 ****************************************************************************/
626
627 int fd_close_posix(struct files_struct *fsp)
628 {
629         int saved_errno = 0;
630         int ret;
631         int *fd_array = NULL;
632         size_t count, i;
633
634         if (!lp_locking(fsp->conn->params) ||
635             !lp_posix_locking(fsp->conn->params))
636         {
637                 /*
638                  * No locking or POSIX to worry about or we want POSIX semantics
639                  * which will lose all locks on all fd's open on this dev/inode,
640                  * just close.
641                  */
642                 return close(fsp->fh->fd);
643         }
644
645         if (get_windows_lock_ref_count(fsp)) {
646
647                 /*
648                  * There are outstanding locks on this dev/inode pair on
649                  * other fds. Add our fd to the pending close tdb and set
650                  * fsp->fh->fd to -1.
651                  */
652
653                 add_fd_to_close_entry(fsp);
654                 return 0;
655         }
656
657         /*
658          * No outstanding locks. Get the pending close fd's
659          * from the tdb and close them all.
660          */
661
662         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
663
664         if (count) {
665                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
666                           (unsigned int)count));
667
668                 for(i = 0; i < count; i++) {
669                         if (close(fd_array[i]) == -1) {
670                                 saved_errno = errno;
671                         }
672                 }
673
674                 /*
675                  * Delete all fd's stored in the tdb
676                  * for this dev/inode pair.
677                  */
678
679                 delete_close_entries(fsp);
680         }
681
682         TALLOC_FREE(fd_array);
683
684         /* Don't need a lock ref count on this dev/ino anymore. */
685         delete_windows_lock_ref_count(fsp);
686
687         /*
688          * Finally close the fd associated with this fsp.
689          */
690
691         ret = close(fsp->fh->fd);
692
693         if (ret == 0 && saved_errno != 0) {
694                 errno = saved_errno;
695                 ret = -1;
696         }
697
698         return ret;
699 }
700
701 /****************************************************************************
702  Next - the functions that deal with the mapping CIFS Windows locks onto
703  the underlying system POSIX locks.
704 ****************************************************************************/
705
706 /*
707  * Structure used when splitting a lock range
708  * into a POSIX lock range. Doubly linked list.
709  */
710
711 struct lock_list {
712         struct lock_list *next;
713         struct lock_list *prev;
714         SMB_OFF_T start;
715         SMB_OFF_T size;
716 };
717
718 /****************************************************************************
719  Create a list of lock ranges that don't overlap a given range. Used in calculating
720  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
721  understand it :-).
722 ****************************************************************************/
723
724 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
725                                                 struct lock_list *lhead,
726                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
727                                                 files_struct *fsp,
728                                                 const struct lock_struct *plocks,
729                                                 int num_locks)
730 {
731         int i;
732
733         /*
734          * Check the current lock list on this dev/inode pair.
735          * Quit if the list is deleted.
736          */
737
738         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
739                 (double)lhead->start, (double)lhead->size ));
740
741         for (i=0; i<num_locks && lhead; i++) {
742                 const struct lock_struct *lock = &plocks[i];
743                 struct lock_list *l_curr;
744
745                 /* Ignore all but read/write locks. */
746                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
747                         continue;
748                 }
749
750                 /* Ignore locks not owned by this process. */
751                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
752                         continue;
753                 }
754
755                 /*
756                  * Walk the lock list, checking for overlaps. Note that
757                  * the lock list can expand within this loop if the current
758                  * range being examined needs to be split.
759                  */
760
761                 for (l_curr = lhead; l_curr;) {
762
763                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
764                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
765
766                         if ( (l_curr->start >= (lock->start + lock->size)) ||
767                                  (lock->start >= (l_curr->start + l_curr->size))) {
768
769                                 /* No overlap with existing lock - leave this range alone. */
770 /*********************************************
771                                              +---------+
772                                              | l_curr  |
773                                              +---------+
774                                 +-------+
775                                 | lock  |
776                                 +-------+
777 OR....
778              +---------+
779              |  l_curr |
780              +---------+
781 **********************************************/
782
783                                 DEBUG(10,(" no overlap case.\n" ));
784
785                                 l_curr = l_curr->next;
786
787                         } else if ( (l_curr->start >= lock->start) &&
788                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
789
790                                 /*
791                                  * This range is completely overlapped by this existing lock range
792                                  * and thus should have no effect. Delete it from the list.
793                                  */
794 /*********************************************
795                 +---------+
796                 |  l_curr |
797                 +---------+
798         +---------------------------+
799         |       lock                |
800         +---------------------------+
801 **********************************************/
802                                 /* Save the next pointer */
803                                 struct lock_list *ul_next = l_curr->next;
804
805                                 DEBUG(10,(" delete case.\n" ));
806
807                                 DLIST_REMOVE(lhead, l_curr);
808                                 if(lhead == NULL) {
809                                         break; /* No more list... */
810                                 }
811
812                                 l_curr = ul_next;
813                                 
814                         } else if ( (l_curr->start >= lock->start) &&
815                                                 (l_curr->start < lock->start + lock->size) &&
816                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
817
818                                 /*
819                                  * This range overlaps the existing lock range at the high end.
820                                  * Truncate by moving start to existing range end and reducing size.
821                                  */
822 /*********************************************
823                 +---------------+
824                 |  l_curr       |
825                 +---------------+
826         +---------------+
827         |    lock       |
828         +---------------+
829 BECOMES....
830                         +-------+
831                         | l_curr|
832                         +-------+
833 **********************************************/
834
835                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
836                                 l_curr->start = lock->start + lock->size;
837
838                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
839                                                                 (double)l_curr->start, (double)l_curr->size ));
840
841                                 l_curr = l_curr->next;
842
843                         } else if ( (l_curr->start < lock->start) &&
844                                                 (l_curr->start + l_curr->size > lock->start) &&
845                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
846
847                                 /*
848                                  * This range overlaps the existing lock range at the low end.
849                                  * Truncate by reducing size.
850                                  */
851 /*********************************************
852    +---------------+
853    |  l_curr       |
854    +---------------+
855            +---------------+
856            |    lock       |
857            +---------------+
858 BECOMES....
859    +-------+
860    | l_curr|
861    +-------+
862 **********************************************/
863
864                                 l_curr->size = lock->start - l_curr->start;
865
866                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
867                                                                 (double)l_curr->start, (double)l_curr->size ));
868
869                                 l_curr = l_curr->next;
870                 
871                         } else if ( (l_curr->start < lock->start) &&
872                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
873                                 /*
874                                  * Worst case scenario. Range completely overlaps an existing
875                                  * lock range. Split the request into two, push the new (upper) request
876                                  * into the dlink list, and continue with the entry after l_new (as we
877                                  * know that l_new will not overlap with this lock).
878                                  */
879 /*********************************************
880         +---------------------------+
881         |        l_curr             |
882         +---------------------------+
883                 +---------+
884                 | lock    |
885                 +---------+
886 BECOMES.....
887         +-------+         +---------+
888         | l_curr|         | l_new   |
889         +-------+         +---------+
890 **********************************************/
891                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
892
893                                 if(l_new == NULL) {
894                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
895                                         return NULL; /* The talloc_destroy takes care of cleanup. */
896                                 }
897
898                                 ZERO_STRUCTP(l_new);
899                                 l_new->start = lock->start + lock->size;
900                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
901
902                                 /* Truncate the l_curr. */
903                                 l_curr->size = lock->start - l_curr->start;
904
905                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
906 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
907                                                                 (double)l_new->start, (double)l_new->size ));
908
909                                 /*
910                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
911                                  */
912                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
913
914                                 /* And move after the link we added. */
915                                 l_curr = l_new->next;
916
917                         } else {
918
919                                 /*
920                                  * This logic case should never happen. Ensure this is the
921                                  * case by forcing an abort.... Remove in production.
922                                  */
923                                 char *msg = NULL;
924
925                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
926 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
927                                         smb_panic(msg);
928                                 } else {
929                                         smb_panic("posix_lock_list");
930                                 }
931                         }
932                 } /* end for ( l_curr = lhead; l_curr;) */
933         } /* end for (i=0; i<num_locks && ul_head; i++) */
934
935         return lhead;
936 }
937
938 /****************************************************************************
939  POSIX function to acquire a lock. Returns True if the
940  lock could be granted, False if not.
941 ****************************************************************************/
942
943 bool set_posix_lock_windows_flavour(files_struct *fsp,
944                         uint64_t u_offset,
945                         uint64_t u_count,
946                         enum brl_type lock_type,
947                         const struct lock_context *lock_ctx,
948                         const struct lock_struct *plocks,
949                         int num_locks,
950                         int *errno_ret)
951 {
952         SMB_OFF_T offset;
953         SMB_OFF_T count;
954         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
955         bool ret = True;
956         size_t lock_count;
957         TALLOC_CTX *l_ctx = NULL;
958         struct lock_list *llist = NULL;
959         struct lock_list *ll = NULL;
960
961         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
962                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
963                  (double)u_offset, (double)u_count,
964                  posix_lock_type_name(lock_type)));
965
966         /*
967          * If the requested lock won't fit in the POSIX range, we will
968          * pretend it was successful.
969          */
970
971         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
972                 increment_windows_lock_ref_count(fsp);
973                 return True;
974         }
975
976         /*
977          * Windows is very strange. It allows read locks to be overlayed
978          * (even over a write lock), but leaves the write lock in force until the first
979          * unlock. It also reference counts the locks. This means the following sequence :
980          *
981          * process1                                      process2
982          * ------------------------------------------------------------------------
983          * WRITE LOCK : start = 2, len = 10
984          *                                            READ LOCK: start =0, len = 10 - FAIL
985          * READ LOCK : start = 0, len = 14 
986          *                                            READ LOCK: start =0, len = 10 - FAIL
987          * UNLOCK : start = 2, len = 10
988          *                                            READ LOCK: start =0, len = 10 - OK
989          *
990          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
991          * would leave a single read lock over the 0-14 region.
992          */
993         
994         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
995                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
996                 return False;
997         }
998
999         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1000                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1001                 talloc_destroy(l_ctx);
1002                 return False;
1003         }
1004
1005         /*
1006          * Create the initial list entry containing the
1007          * lock we want to add.
1008          */
1009
1010         ZERO_STRUCTP(ll);
1011         ll->start = offset;
1012         ll->size = count;
1013
1014         DLIST_ADD(llist, ll);
1015
1016         /*
1017          * The following call calculates if there are any
1018          * overlapping locks held by this process on
1019          * fd's open on the same file and splits this list
1020          * into a list of lock ranges that do not overlap with existing
1021          * POSIX locks.
1022          */
1023
1024         llist = posix_lock_list(l_ctx,
1025                                 llist,
1026                                 lock_ctx, /* Lock context llist belongs to. */
1027                                 fsp,
1028                                 plocks,
1029                                 num_locks);
1030
1031         /*
1032          * Add the POSIX locks on the list of ranges returned.
1033          * As the lock is supposed to be added atomically, we need to
1034          * back out all the locks if any one of these calls fail.
1035          */
1036
1037         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1038                 offset = ll->start;
1039                 count = ll->size;
1040
1041                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1042                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1043
1044                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1045                         *errno_ret = errno;
1046                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1047                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1048                         ret = False;
1049                         break;
1050                 }
1051         }
1052
1053         if (!ret) {
1054
1055                 /*
1056                  * Back out all the POSIX locks we have on fail.
1057                  */
1058
1059                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1060                         offset = ll->start;
1061                         count = ll->size;
1062
1063                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1064                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1065
1066                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1067                 }
1068         } else {
1069                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1070                 increment_windows_lock_ref_count(fsp);
1071         }
1072
1073         talloc_destroy(l_ctx);
1074         return ret;
1075 }
1076
1077 /****************************************************************************
1078  POSIX function to release a lock. Returns True if the
1079  lock could be released, False if not.
1080 ****************************************************************************/
1081
1082 bool release_posix_lock_windows_flavour(files_struct *fsp,
1083                                 uint64_t u_offset,
1084                                 uint64_t u_count,
1085                                 enum brl_type deleted_lock_type,
1086                                 const struct lock_context *lock_ctx,
1087                                 const struct lock_struct *plocks,
1088                                 int num_locks)
1089 {
1090         SMB_OFF_T offset;
1091         SMB_OFF_T count;
1092         bool ret = True;
1093         TALLOC_CTX *ul_ctx = NULL;
1094         struct lock_list *ulist = NULL;
1095         struct lock_list *ul = NULL;
1096
1097         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1098                  "count = %.0f\n", fsp_str_dbg(fsp),
1099                  (double)u_offset, (double)u_count));
1100
1101         /* Remember the number of Windows locks we have on this dev/ino pair. */
1102         decrement_windows_lock_ref_count(fsp);
1103
1104         /*
1105          * If the requested lock won't fit in the POSIX range, we will
1106          * pretend it was successful.
1107          */
1108
1109         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1110                 return True;
1111         }
1112
1113         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1114                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1115                 return False;
1116         }
1117
1118         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1119                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1120                 talloc_destroy(ul_ctx);
1121                 return False;
1122         }
1123
1124         /*
1125          * Create the initial list entry containing the
1126          * lock we want to remove.
1127          */
1128
1129         ZERO_STRUCTP(ul);
1130         ul->start = offset;
1131         ul->size = count;
1132
1133         DLIST_ADD(ulist, ul);
1134
1135         /*
1136          * The following call calculates if there are any
1137          * overlapping locks held by this process on
1138          * fd's open on the same file and creates a
1139          * list of unlock ranges that will allow
1140          * POSIX lock ranges to remain on the file whilst the
1141          * unlocks are performed.
1142          */
1143
1144         ulist = posix_lock_list(ul_ctx,
1145                                 ulist,
1146                                 lock_ctx, /* Lock context ulist belongs to. */
1147                                 fsp,
1148                                 plocks,
1149                                 num_locks);
1150
1151         /*
1152          * If there were any overlapped entries (list is > 1 or size or start have changed),
1153          * and the lock_type we just deleted from
1154          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1155          * the POSIX lock to a read lock. This allows any overlapping read locks
1156          * to be atomically maintained.
1157          */
1158
1159         if (deleted_lock_type == WRITE_LOCK &&
1160                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1161
1162                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1163                         (double)offset, (double)count ));
1164
1165                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1166                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1167                         talloc_destroy(ul_ctx);
1168                         return False;
1169                 }
1170         }
1171
1172         /*
1173          * Release the POSIX locks on the list of ranges returned.
1174          */
1175
1176         for(; ulist; ulist = ulist->next) {
1177                 offset = ulist->start;
1178                 count = ulist->size;
1179
1180                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1181                         (double)offset, (double)count ));
1182
1183                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1184                         ret = False;
1185                 }
1186         }
1187
1188         talloc_destroy(ul_ctx);
1189         return ret;
1190 }
1191
1192 /****************************************************************************
1193  Next - the functions that deal with mapping CIFS POSIX locks onto
1194  the underlying system POSIX locks.
1195 ****************************************************************************/
1196
1197 /****************************************************************************
1198  POSIX function to acquire a lock. Returns True if the
1199  lock could be granted, False if not.
1200  As POSIX locks don't stack or conflict (they just overwrite)
1201  we can map the requested lock directly onto a system one. We
1202  know it doesn't conflict with locks on other contexts as the
1203  upper layer would have refused it.
1204 ****************************************************************************/
1205
1206 bool set_posix_lock_posix_flavour(files_struct *fsp,
1207                         uint64_t u_offset,
1208                         uint64_t u_count,
1209                         enum brl_type lock_type,
1210                         int *errno_ret)
1211 {
1212         SMB_OFF_T offset;
1213         SMB_OFF_T count;
1214         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1215
1216         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1217                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1218                  (double)u_offset, (double)u_count,
1219                  posix_lock_type_name(lock_type)));
1220
1221         /*
1222          * If the requested lock won't fit in the POSIX range, we will
1223          * pretend it was successful.
1224          */
1225
1226         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1227                 return True;
1228         }
1229
1230         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1231                 *errno_ret = errno;
1232                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1233                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1234                 return False;
1235         }
1236         return True;
1237 }
1238
1239 /****************************************************************************
1240  POSIX function to release a lock. Returns True if the
1241  lock could be released, False if not.
1242  We are given a complete lock state from the upper layer which is what the lock
1243  state should be after the unlock has already been done, so what
1244  we do is punch out holes in the unlock range where locks owned by this process
1245  have a different lock context.
1246 ****************************************************************************/
1247
1248 bool release_posix_lock_posix_flavour(files_struct *fsp,
1249                                 uint64_t u_offset,
1250                                 uint64_t u_count,
1251                                 const struct lock_context *lock_ctx,
1252                                 const struct lock_struct *plocks,
1253                                 int num_locks)
1254 {
1255         bool ret = True;
1256         SMB_OFF_T offset;
1257         SMB_OFF_T count;
1258         TALLOC_CTX *ul_ctx = NULL;
1259         struct lock_list *ulist = NULL;
1260         struct lock_list *ul = NULL;
1261
1262         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1263                  "count = %.0f\n", fsp_str_dbg(fsp),
1264                  (double)u_offset, (double)u_count));
1265
1266         /*
1267          * If the requested lock won't fit in the POSIX range, we will
1268          * pretend it was successful.
1269          */
1270
1271         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1272                 return True;
1273         }
1274
1275         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1276                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1277                 return False;
1278         }
1279
1280         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1281                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1282                 talloc_destroy(ul_ctx);
1283                 return False;
1284         }
1285
1286         /*
1287          * Create the initial list entry containing the
1288          * lock we want to remove.
1289          */
1290
1291         ZERO_STRUCTP(ul);
1292         ul->start = offset;
1293         ul->size = count;
1294
1295         DLIST_ADD(ulist, ul);
1296
1297         /*
1298          * Walk the given array creating a linked list
1299          * of unlock requests.
1300          */
1301
1302         ulist = posix_lock_list(ul_ctx,
1303                                 ulist,
1304                                 lock_ctx, /* Lock context ulist belongs to. */
1305                                 fsp,
1306                                 plocks,
1307                                 num_locks);
1308
1309         /*
1310          * Release the POSIX locks on the list of ranges returned.
1311          */
1312
1313         for(; ulist; ulist = ulist->next) {
1314                 offset = ulist->start;
1315                 count = ulist->size;
1316
1317                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1318                         (double)offset, (double)count ));
1319
1320                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1321                         ret = False;
1322                 }
1323         }
1324
1325         talloc_destroy(ul_ctx);
1326         return ret;
1327 }