Merge branch 'master' of ssh://git.samba.org/data/git/samba
[nivanova/samba-autobuild/.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25
26 #undef DBGC_CLASS
27 #define DBGC_CLASS DBGC_LOCKING
28
29 /*
30  * The pending close database handle.
31  */
32
33 static struct db_context *posix_pending_close_db;
34
35 /****************************************************************************
36  First - the functions that deal with the underlying system locks - these
37  functions are used no matter if we're mapping CIFS Windows locks or CIFS
38  POSIX locks onto POSIX.
39 ****************************************************************************/
40
41 /****************************************************************************
42  Utility function to map a lock type correctly depending on the open
43  mode of a file.
44 ****************************************************************************/
45
46 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
47 {
48         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
49                 /*
50                  * Many UNIX's cannot get a write lock on a file opened read-only.
51                  * Win32 locking semantics allow this.
52                  * Do the best we can and attempt a read-only lock.
53                  */
54                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
55                 return F_RDLCK;
56         }
57
58         /*
59          * This return should be the most normal, as we attempt
60          * to always open files read/write.
61          */
62
63         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
64 }
65
66 /****************************************************************************
67  Debugging aid :-).
68 ****************************************************************************/
69
70 static const char *posix_lock_type_name(int lock_type)
71 {
72         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
73 }
74
75 /****************************************************************************
76  Check to see if the given unsigned lock range is within the possible POSIX
77  range. Modifies the given args to be in range if possible, just returns
78  False if not.
79 ****************************************************************************/
80
81 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
82                                 uint64_t u_offset, uint64_t u_count)
83 {
84         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
85         SMB_OFF_T count = (SMB_OFF_T)u_count;
86
87         /*
88          * For the type of system we are, attempt to
89          * find the maximum positive lock offset as an SMB_OFF_T.
90          */
91
92 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
93
94         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
95
96 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
97
98         /*
99          * In this case SMB_OFF_T is 64 bits,
100          * and the underlying system can handle 64 bit signed locks.
101          */
102
103         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
104         SMB_OFF_T mask = (mask2<<1);
105         SMB_OFF_T max_positive_lock_offset = ~mask;
106
107 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
108
109         /*
110          * In this case either SMB_OFF_T is 32 bits,
111          * or the underlying system cannot handle 64 bit signed locks.
112          * All offsets & counts must be 2^31 or less.
113          */
114
115         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
116
117 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
118
119         /*
120          * POSIX locks of length zero mean lock to end-of-file.
121          * Win32 locks of length zero are point probes. Ignore
122          * any Win32 locks of length zero. JRA.
123          */
124
125         if (count == (SMB_OFF_T)0) {
126                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
127                 return False;
128         }
129
130         /*
131          * If the given offset was > max_positive_lock_offset then we cannot map this at all
132          * ignore this lock.
133          */
134
135         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
136                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
137                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
138                 return False;
139         }
140
141         /*
142          * We must truncate the count to less than max_positive_lock_offset.
143          */
144
145         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
146                 count = max_positive_lock_offset;
147         }
148
149         /*
150          * Truncate count to end at max lock offset.
151          */
152
153         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
154                 count = max_positive_lock_offset - offset;
155         }
156
157         /*
158          * If we ate all the count, ignore this lock.
159          */
160
161         if (count == 0) {
162                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
163                                 (double)u_offset, (double)u_count ));
164                 return False;
165         }
166
167         /*
168          * The mapping was successful.
169          */
170
171         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
172                         (double)offset, (double)count ));
173
174         *offset_out = offset;
175         *count_out = count;
176         
177         return True;
178 }
179
180 /****************************************************************************
181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182  broken NFS implementations.
183 ****************************************************************************/
184
185 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
186 {
187         bool ret;
188
189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
190
191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
192
193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
194
195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196                                         (double)offset,(double)count));
197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
199
200                 /*
201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
202                  * 32 bit NFS mounted filesystems. Just ignore it.
203                  */
204
205                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207                         return True;
208                 }
209
210                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
211                         /* 32 bit NFS file system, retry with smaller offset */
212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213                         errno = 0;
214                         count &= 0x7fffffff;
215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
216                 }
217         }
218
219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220         return ret;
221 }
222
223 /****************************************************************************
224  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
225  broken NFS implementations.
226 ****************************************************************************/
227
228 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
229 {
230         pid_t pid;
231         bool ret;
232
233         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
234                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
235
236         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
237
238         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
239
240                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
241                                         (double)*poffset,(double)*pcount));
242                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
243                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
244
245                 /*
246                  * If the offset is > 0x7FFFFFFF then this will cause problems on
247                  * 32 bit NFS mounted filesystems. Just ignore it.
248                  */
249
250                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
251                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
252                         return True;
253                 }
254
255                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
256                         /* 32 bit NFS file system, retry with smaller offset */
257                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
258                         errno = 0;
259                         *pcount &= 0x7fffffff;
260                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
261                 }
262         }
263
264         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
265         return ret;
266 }
267
268 /****************************************************************************
269  POSIX function to see if a file region is locked. Returns True if the
270  region is locked, False otherwise.
271 ****************************************************************************/
272
273 bool is_posix_locked(files_struct *fsp,
274                         uint64_t *pu_offset,
275                         uint64_t *pu_count,
276                         enum brl_type *plock_type,
277                         enum brl_flavour lock_flav)
278 {
279         SMB_OFF_T offset;
280         SMB_OFF_T count;
281         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
282
283         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
284                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
285
286         /*
287          * If the requested lock won't fit in the POSIX range, we will
288          * never set it, so presume it is not locked.
289          */
290
291         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
292                 return False;
293         }
294
295         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
296                 return False;
297         }
298
299         if (posix_lock_type == F_UNLCK) {
300                 return False;
301         }
302
303         if (lock_flav == POSIX_LOCK) {
304                 /* Only POSIX lock queries need to know the details. */
305                 *pu_offset = (uint64_t)offset;
306                 *pu_count = (uint64_t)count;
307                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
308         }
309         return True;
310 }
311
312 /****************************************************************************
313  Next - the functions that deal with in memory database storing representations
314  of either Windows CIFS locks or POSIX CIFS locks.
315 ****************************************************************************/
316
317 /* The key used in the in-memory POSIX databases. */
318
319 struct lock_ref_count_key {
320         struct file_id id;
321         char r;
322 }; 
323
324 /*******************************************************************
325  Form a static locking key for a dev/inode pair for the lock ref count
326 ******************************************************************/
327
328 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
329                                           struct lock_ref_count_key *tmp)
330 {
331         ZERO_STRUCTP(tmp);
332         tmp->id = fsp->file_id;
333         tmp->r = 'r';
334         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
335 }
336
337 /*******************************************************************
338  Convenience function to get an fd_array key from an fsp.
339 ******************************************************************/
340
341 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
342 {
343         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
344 }
345
346 /*******************************************************************
347  Create the in-memory POSIX lock databases.
348 ********************************************************************/
349
350 bool posix_locking_init(bool read_only)
351 {
352         if (posix_pending_close_db != NULL) {
353                 return true;
354         }
355
356         posix_pending_close_db = db_open_rbt(NULL);
357
358         if (posix_pending_close_db == NULL) {
359                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
360                 return false;
361         }
362
363         return true;
364 }
365
366 /*******************************************************************
367  Delete the in-memory POSIX lock databases.
368 ********************************************************************/
369
370 bool posix_locking_end(void)
371 {
372         /*
373          * Shouldn't we close all fd's here?
374          */
375         TALLOC_FREE(posix_pending_close_db);
376         return true;
377 }
378
379 /****************************************************************************
380  Next - the functions that deal with storing fd's that have outstanding
381  POSIX locks when closed.
382 ****************************************************************************/
383
384 /****************************************************************************
385  The records in posix_pending_close_tdb are composed of an array of ints
386  keyed by dev/ino pair.
387  The first int is a reference count of the number of outstanding locks on
388  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
389  were open on this dev/ino pair that should have been closed, but can't as
390  the lock ref count is non zero.
391 ****************************************************************************/
392
393 /****************************************************************************
394  Keep a reference count of the number of Windows locks open on this dev/ino
395  pair. Creates entry if it doesn't exist.
396 ****************************************************************************/
397
398 static void increment_windows_lock_ref_count(files_struct *fsp)
399 {
400         struct lock_ref_count_key tmp;
401         struct db_record *rec;
402         int lock_ref_count = 0;
403         NTSTATUS status;
404
405         rec = posix_pending_close_db->fetch_locked(
406                 posix_pending_close_db, talloc_tos(),
407                 locking_ref_count_key_fsp(fsp, &tmp));
408
409         SMB_ASSERT(rec != NULL);
410
411         if (rec->value.dptr != NULL) {
412                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
413                 memcpy(&lock_ref_count, rec->value.dptr,
414                        sizeof(lock_ref_count));
415         }
416
417         lock_ref_count++;
418
419         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
420                                                sizeof(lock_ref_count)), 0);
421
422         SMB_ASSERT(NT_STATUS_IS_OK(status));
423
424         TALLOC_FREE(rec);
425
426         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
427                   fsp->fsp_name, lock_ref_count ));
428 }
429
430 /****************************************************************************
431  Bulk delete - subtract as many locks as we've just deleted.
432 ****************************************************************************/
433
434 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
435 {
436         struct lock_ref_count_key tmp;
437         struct db_record *rec;
438         int lock_ref_count = 0;
439         NTSTATUS status;
440
441         rec = posix_pending_close_db->fetch_locked(
442                 posix_pending_close_db, talloc_tos(),
443                 locking_ref_count_key_fsp(fsp, &tmp));
444
445         SMB_ASSERT((rec != NULL)
446                    && (rec->value.dptr != NULL)
447                    && (rec->value.dsize == sizeof(lock_ref_count)));
448
449         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
450
451         SMB_ASSERT(lock_ref_count > 0);
452
453         lock_ref_count -= dcount;
454
455         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
456                                                sizeof(lock_ref_count)), 0);
457
458         SMB_ASSERT(NT_STATUS_IS_OK(status));
459
460         TALLOC_FREE(rec);
461
462         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
463                   fsp->fsp_name, lock_ref_count ));
464 }
465
466 static void decrement_windows_lock_ref_count(files_struct *fsp)
467 {
468         reduce_windows_lock_ref_count(fsp, 1);
469 }
470
471 /****************************************************************************
472  Fetch the lock ref count.
473 ****************************************************************************/
474
475 static int get_windows_lock_ref_count(files_struct *fsp)
476 {
477         struct lock_ref_count_key tmp;
478         TDB_DATA dbuf;
479         int res;
480         int lock_ref_count = 0;
481
482         res = posix_pending_close_db->fetch(
483                 posix_pending_close_db, talloc_tos(),
484                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
485
486         SMB_ASSERT(res == 0);
487
488         if (dbuf.dsize != 0) {
489                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
490                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
491                 TALLOC_FREE(dbuf.dptr);
492         }
493
494         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
495                   fsp->fsp_name, lock_ref_count ));
496
497         return lock_ref_count;
498 }
499
500 /****************************************************************************
501  Delete a lock_ref_count entry.
502 ****************************************************************************/
503
504 static void delete_windows_lock_ref_count(files_struct *fsp)
505 {
506         struct lock_ref_count_key tmp;
507         struct db_record *rec;
508
509         rec = posix_pending_close_db->fetch_locked(
510                 posix_pending_close_db, talloc_tos(),
511                 locking_ref_count_key_fsp(fsp, &tmp));
512
513         SMB_ASSERT(rec != NULL);
514
515         /* Not a bug if it doesn't exist - no locks were ever granted. */
516
517         rec->delete_rec(rec);
518         TALLOC_FREE(rec);
519
520         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
521                   fsp->fsp_name));
522 }
523
524 /****************************************************************************
525  Add an fd to the pending close tdb.
526 ****************************************************************************/
527
528 static void add_fd_to_close_entry(files_struct *fsp)
529 {
530         struct db_record *rec;
531         uint8_t *new_data;
532         NTSTATUS status;
533
534         rec = posix_pending_close_db->fetch_locked(
535                 posix_pending_close_db, talloc_tos(),
536                 fd_array_key_fsp(fsp));
537
538         SMB_ASSERT(rec != NULL);
539
540         new_data = TALLOC_ARRAY(
541                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
542
543         SMB_ASSERT(new_data != NULL);
544
545         memcpy(new_data, rec->value.dptr, rec->value.dsize);
546         memcpy(new_data + rec->value.dsize,
547                &fsp->fh->fd, sizeof(fsp->fh->fd));
548
549         status = rec->store(
550                 rec, make_tdb_data(new_data,
551                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
552
553         SMB_ASSERT(NT_STATUS_IS_OK(status));
554
555         TALLOC_FREE(rec);
556
557         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
558                   fsp->fh->fd, fsp->fsp_name ));
559 }
560
561 /****************************************************************************
562  Remove all fd entries for a specific dev/inode pair from the tdb.
563 ****************************************************************************/
564
565 static void delete_close_entries(files_struct *fsp)
566 {
567         struct db_record *rec;
568
569         rec = posix_pending_close_db->fetch_locked(
570                 posix_pending_close_db, talloc_tos(),
571                 fd_array_key_fsp(fsp));
572
573         SMB_ASSERT(rec != NULL);
574         rec->delete_rec(rec);
575         TALLOC_FREE(rec);
576 }
577
578 /****************************************************************************
579  Get the array of POSIX pending close records for an open fsp. Returns number
580  of entries.
581 ****************************************************************************/
582
583 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
584                                               files_struct *fsp, int **entries)
585 {
586         TDB_DATA dbuf;
587         int res;
588
589         res = posix_pending_close_db->fetch(
590                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
591                 &dbuf);
592
593         SMB_ASSERT(res == 0);
594
595         if (dbuf.dsize == 0) {
596                 *entries = NULL;
597                 return 0;
598         }
599
600         *entries = (int *)dbuf.dptr;
601         return (size_t)(dbuf.dsize / sizeof(int));
602 }
603
604 /****************************************************************************
605  Deal with pending closes needed by POSIX locking support.
606  Note that posix_locking_close_file() is expected to have been called
607  to delete all locks on this fsp before this function is called.
608 ****************************************************************************/
609
610 int fd_close_posix(struct files_struct *fsp)
611 {
612         int saved_errno = 0;
613         int ret;
614         int *fd_array = NULL;
615         size_t count, i;
616
617         if (!lp_locking(fsp->conn->params) ||
618             !lp_posix_locking(fsp->conn->params))
619         {
620                 /*
621                  * No locking or POSIX to worry about or we want POSIX semantics
622                  * which will lose all locks on all fd's open on this dev/inode,
623                  * just close.
624                  */
625                 return close(fsp->fh->fd);
626         }
627
628         if (get_windows_lock_ref_count(fsp)) {
629
630                 /*
631                  * There are outstanding locks on this dev/inode pair on
632                  * other fds. Add our fd to the pending close tdb and set
633                  * fsp->fh->fd to -1.
634                  */
635
636                 add_fd_to_close_entry(fsp);
637                 return 0;
638         }
639
640         /*
641          * No outstanding locks. Get the pending close fd's
642          * from the tdb and close them all.
643          */
644
645         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
646
647         if (count) {
648                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
649                           (unsigned int)count));
650
651                 for(i = 0; i < count; i++) {
652                         if (close(fd_array[i]) == -1) {
653                                 saved_errno = errno;
654                         }
655                 }
656
657                 /*
658                  * Delete all fd's stored in the tdb
659                  * for this dev/inode pair.
660                  */
661
662                 delete_close_entries(fsp);
663         }
664
665         TALLOC_FREE(fd_array);
666
667         /* Don't need a lock ref count on this dev/ino anymore. */
668         delete_windows_lock_ref_count(fsp);
669
670         /*
671          * Finally close the fd associated with this fsp.
672          */
673
674         ret = close(fsp->fh->fd);
675
676         if (ret == 0 && saved_errno != 0) {
677                 errno = saved_errno;
678                 ret = -1;
679         }
680
681         return ret;
682 }
683
684 /****************************************************************************
685  Next - the functions that deal with the mapping CIFS Windows locks onto
686  the underlying system POSIX locks.
687 ****************************************************************************/
688
689 /*
690  * Structure used when splitting a lock range
691  * into a POSIX lock range. Doubly linked list.
692  */
693
694 struct lock_list {
695         struct lock_list *next;
696         struct lock_list *prev;
697         SMB_OFF_T start;
698         SMB_OFF_T size;
699 };
700
701 /****************************************************************************
702  Create a list of lock ranges that don't overlap a given range. Used in calculating
703  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
704  understand it :-).
705 ****************************************************************************/
706
707 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
708                                                 struct lock_list *lhead,
709                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
710                                                 files_struct *fsp,
711                                                 const struct lock_struct *plocks,
712                                                 int num_locks)
713 {
714         int i;
715
716         /*
717          * Check the current lock list on this dev/inode pair.
718          * Quit if the list is deleted.
719          */
720
721         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
722                 (double)lhead->start, (double)lhead->size ));
723
724         for (i=0; i<num_locks && lhead; i++) {
725                 const struct lock_struct *lock = &plocks[i];
726                 struct lock_list *l_curr;
727
728                 /* Ignore all but read/write locks. */
729                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
730                         continue;
731                 }
732
733                 /* Ignore locks not owned by this process. */
734                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
735                         continue;
736                 }
737
738                 /*
739                  * Walk the lock list, checking for overlaps. Note that
740                  * the lock list can expand within this loop if the current
741                  * range being examined needs to be split.
742                  */
743
744                 for (l_curr = lhead; l_curr;) {
745
746                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
747                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
748
749                         if ( (l_curr->start >= (lock->start + lock->size)) ||
750                                  (lock->start >= (l_curr->start + l_curr->size))) {
751
752                                 /* No overlap with existing lock - leave this range alone. */
753 /*********************************************
754                                              +---------+
755                                              | l_curr  |
756                                              +---------+
757                                 +-------+
758                                 | lock  |
759                                 +-------+
760 OR....
761              +---------+
762              |  l_curr |
763              +---------+
764 **********************************************/
765
766                                 DEBUG(10,(" no overlap case.\n" ));
767
768                                 l_curr = l_curr->next;
769
770                         } else if ( (l_curr->start >= lock->start) &&
771                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
772
773                                 /*
774                                  * This range is completely overlapped by this existing lock range
775                                  * and thus should have no effect. Delete it from the list.
776                                  */
777 /*********************************************
778                 +---------+
779                 |  l_curr |
780                 +---------+
781         +---------------------------+
782         |       lock                |
783         +---------------------------+
784 **********************************************/
785                                 /* Save the next pointer */
786                                 struct lock_list *ul_next = l_curr->next;
787
788                                 DEBUG(10,(" delete case.\n" ));
789
790                                 DLIST_REMOVE(lhead, l_curr);
791                                 if(lhead == NULL) {
792                                         break; /* No more list... */
793                                 }
794
795                                 l_curr = ul_next;
796                                 
797                         } else if ( (l_curr->start >= lock->start) &&
798                                                 (l_curr->start < lock->start + lock->size) &&
799                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
800
801                                 /*
802                                  * This range overlaps the existing lock range at the high end.
803                                  * Truncate by moving start to existing range end and reducing size.
804                                  */
805 /*********************************************
806                 +---------------+
807                 |  l_curr       |
808                 +---------------+
809         +---------------+
810         |    lock       |
811         +---------------+
812 BECOMES....
813                         +-------+
814                         | l_curr|
815                         +-------+
816 **********************************************/
817
818                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
819                                 l_curr->start = lock->start + lock->size;
820
821                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
822                                                                 (double)l_curr->start, (double)l_curr->size ));
823
824                                 l_curr = l_curr->next;
825
826                         } else if ( (l_curr->start < lock->start) &&
827                                                 (l_curr->start + l_curr->size > lock->start) &&
828                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
829
830                                 /*
831                                  * This range overlaps the existing lock range at the low end.
832                                  * Truncate by reducing size.
833                                  */
834 /*********************************************
835    +---------------+
836    |  l_curr       |
837    +---------------+
838            +---------------+
839            |    lock       |
840            +---------------+
841 BECOMES....
842    +-------+
843    | l_curr|
844    +-------+
845 **********************************************/
846
847                                 l_curr->size = lock->start - l_curr->start;
848
849                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
850                                                                 (double)l_curr->start, (double)l_curr->size ));
851
852                                 l_curr = l_curr->next;
853                 
854                         } else if ( (l_curr->start < lock->start) &&
855                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
856                                 /*
857                                  * Worst case scenario. Range completely overlaps an existing
858                                  * lock range. Split the request into two, push the new (upper) request
859                                  * into the dlink list, and continue with the entry after l_new (as we
860                                  * know that l_new will not overlap with this lock).
861                                  */
862 /*********************************************
863         +---------------------------+
864         |        l_curr             |
865         +---------------------------+
866                 +---------+
867                 | lock    |
868                 +---------+
869 BECOMES.....
870         +-------+         +---------+
871         | l_curr|         | l_new   |
872         +-------+         +---------+
873 **********************************************/
874                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
875
876                                 if(l_new == NULL) {
877                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
878                                         return NULL; /* The talloc_destroy takes care of cleanup. */
879                                 }
880
881                                 ZERO_STRUCTP(l_new);
882                                 l_new->start = lock->start + lock->size;
883                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
884
885                                 /* Truncate the l_curr. */
886                                 l_curr->size = lock->start - l_curr->start;
887
888                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
889 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
890                                                                 (double)l_new->start, (double)l_new->size ));
891
892                                 /*
893                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
894                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
895                                  */
896
897                                 l_new->prev = l_curr;
898                                 l_new->next = l_curr->next;
899                                 l_curr->next = l_new;
900
901                                 /* And move after the link we added. */
902                                 l_curr = l_new->next;
903
904                         } else {
905
906                                 /*
907                                  * This logic case should never happen. Ensure this is the
908                                  * case by forcing an abort.... Remove in production.
909                                  */
910                                 char *msg = NULL;
911
912                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
913 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
914                                         smb_panic(msg);
915                                 } else {
916                                         smb_panic("posix_lock_list");
917                                 }
918                         }
919                 } /* end for ( l_curr = lhead; l_curr;) */
920         } /* end for (i=0; i<num_locks && ul_head; i++) */
921
922         return lhead;
923 }
924
925 /****************************************************************************
926  POSIX function to acquire a lock. Returns True if the
927  lock could be granted, False if not.
928 ****************************************************************************/
929
930 bool set_posix_lock_windows_flavour(files_struct *fsp,
931                         uint64_t u_offset,
932                         uint64_t u_count,
933                         enum brl_type lock_type,
934                         const struct lock_context *lock_ctx,
935                         const struct lock_struct *plocks,
936                         int num_locks,
937                         int *errno_ret)
938 {
939         SMB_OFF_T offset;
940         SMB_OFF_T count;
941         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
942         bool ret = True;
943         size_t lock_count;
944         TALLOC_CTX *l_ctx = NULL;
945         struct lock_list *llist = NULL;
946         struct lock_list *ll = NULL;
947
948         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
949                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
950
951         /*
952          * If the requested lock won't fit in the POSIX range, we will
953          * pretend it was successful.
954          */
955
956         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
957                 increment_windows_lock_ref_count(fsp);
958                 return True;
959         }
960
961         /*
962          * Windows is very strange. It allows read locks to be overlayed
963          * (even over a write lock), but leaves the write lock in force until the first
964          * unlock. It also reference counts the locks. This means the following sequence :
965          *
966          * process1                                      process2
967          * ------------------------------------------------------------------------
968          * WRITE LOCK : start = 2, len = 10
969          *                                            READ LOCK: start =0, len = 10 - FAIL
970          * READ LOCK : start = 0, len = 14 
971          *                                            READ LOCK: start =0, len = 10 - FAIL
972          * UNLOCK : start = 2, len = 10
973          *                                            READ LOCK: start =0, len = 10 - OK
974          *
975          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
976          * would leave a single read lock over the 0-14 region.
977          */
978         
979         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
980                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
981                 return False;
982         }
983
984         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
985                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
986                 talloc_destroy(l_ctx);
987                 return False;
988         }
989
990         /*
991          * Create the initial list entry containing the
992          * lock we want to add.
993          */
994
995         ZERO_STRUCTP(ll);
996         ll->start = offset;
997         ll->size = count;
998
999         DLIST_ADD(llist, ll);
1000
1001         /*
1002          * The following call calculates if there are any
1003          * overlapping locks held by this process on
1004          * fd's open on the same file and splits this list
1005          * into a list of lock ranges that do not overlap with existing
1006          * POSIX locks.
1007          */
1008
1009         llist = posix_lock_list(l_ctx,
1010                                 llist,
1011                                 lock_ctx, /* Lock context llist belongs to. */
1012                                 fsp,
1013                                 plocks,
1014                                 num_locks);
1015
1016         /*
1017          * Add the POSIX locks on the list of ranges returned.
1018          * As the lock is supposed to be added atomically, we need to
1019          * back out all the locks if any one of these calls fail.
1020          */
1021
1022         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1023                 offset = ll->start;
1024                 count = ll->size;
1025
1026                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1027                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1028
1029                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1030                         *errno_ret = errno;
1031                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1032                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1033                         ret = False;
1034                         break;
1035                 }
1036         }
1037
1038         if (!ret) {
1039
1040                 /*
1041                  * Back out all the POSIX locks we have on fail.
1042                  */
1043
1044                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1045                         offset = ll->start;
1046                         count = ll->size;
1047
1048                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1049                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1050
1051                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1052                 }
1053         } else {
1054                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1055                 increment_windows_lock_ref_count(fsp);
1056         }
1057
1058         talloc_destroy(l_ctx);
1059         return ret;
1060 }
1061
1062 /****************************************************************************
1063  POSIX function to release a lock. Returns True if the
1064  lock could be released, False if not.
1065 ****************************************************************************/
1066
1067 bool release_posix_lock_windows_flavour(files_struct *fsp,
1068                                 uint64_t u_offset,
1069                                 uint64_t u_count,
1070                                 enum brl_type deleted_lock_type,
1071                                 const struct lock_context *lock_ctx,
1072                                 const struct lock_struct *plocks,
1073                                 int num_locks)
1074 {
1075         SMB_OFF_T offset;
1076         SMB_OFF_T count;
1077         bool ret = True;
1078         TALLOC_CTX *ul_ctx = NULL;
1079         struct lock_list *ulist = NULL;
1080         struct lock_list *ul = NULL;
1081
1082         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f\n",
1083                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1084
1085         /* Remember the number of Windows locks we have on this dev/ino pair. */
1086         decrement_windows_lock_ref_count(fsp);
1087
1088         /*
1089          * If the requested lock won't fit in the POSIX range, we will
1090          * pretend it was successful.
1091          */
1092
1093         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1094                 return True;
1095         }
1096
1097         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1098                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1099                 return False;
1100         }
1101
1102         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1103                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1104                 talloc_destroy(ul_ctx);
1105                 return False;
1106         }
1107
1108         /*
1109          * Create the initial list entry containing the
1110          * lock we want to remove.
1111          */
1112
1113         ZERO_STRUCTP(ul);
1114         ul->start = offset;
1115         ul->size = count;
1116
1117         DLIST_ADD(ulist, ul);
1118
1119         /*
1120          * The following call calculates if there are any
1121          * overlapping locks held by this process on
1122          * fd's open on the same file and creates a
1123          * list of unlock ranges that will allow
1124          * POSIX lock ranges to remain on the file whilst the
1125          * unlocks are performed.
1126          */
1127
1128         ulist = posix_lock_list(ul_ctx,
1129                                 ulist,
1130                                 lock_ctx, /* Lock context ulist belongs to. */
1131                                 fsp,
1132                                 plocks,
1133                                 num_locks);
1134
1135         /*
1136          * If there were any overlapped entries (list is > 1 or size or start have changed),
1137          * and the lock_type we just deleted from
1138          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1139          * the POSIX lock to a read lock. This allows any overlapping read locks
1140          * to be atomically maintained.
1141          */
1142
1143         if (deleted_lock_type == WRITE_LOCK &&
1144                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1145
1146                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1147                         (double)offset, (double)count ));
1148
1149                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1150                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1151                         talloc_destroy(ul_ctx);
1152                         return False;
1153                 }
1154         }
1155
1156         /*
1157          * Release the POSIX locks on the list of ranges returned.
1158          */
1159
1160         for(; ulist; ulist = ulist->next) {
1161                 offset = ulist->start;
1162                 count = ulist->size;
1163
1164                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1165                         (double)offset, (double)count ));
1166
1167                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1168                         ret = False;
1169                 }
1170         }
1171
1172         talloc_destroy(ul_ctx);
1173         return ret;
1174 }
1175
1176 /****************************************************************************
1177  Next - the functions that deal with mapping CIFS POSIX locks onto
1178  the underlying system POSIX locks.
1179 ****************************************************************************/
1180
1181 /****************************************************************************
1182  POSIX function to acquire a lock. Returns True if the
1183  lock could be granted, False if not.
1184  As POSIX locks don't stack or conflict (they just overwrite)
1185  we can map the requested lock directly onto a system one. We
1186  know it doesn't conflict with locks on other contexts as the
1187  upper layer would have refused it.
1188 ****************************************************************************/
1189
1190 bool set_posix_lock_posix_flavour(files_struct *fsp,
1191                         uint64_t u_offset,
1192                         uint64_t u_count,
1193                         enum brl_type lock_type,
1194                         int *errno_ret)
1195 {
1196         SMB_OFF_T offset;
1197         SMB_OFF_T count;
1198         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1199
1200         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
1201                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1202
1203         /*
1204          * If the requested lock won't fit in the POSIX range, we will
1205          * pretend it was successful.
1206          */
1207
1208         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1209                 return True;
1210         }
1211
1212         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1213                 *errno_ret = errno;
1214                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1215                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1216                 return False;
1217         }
1218         return True;
1219 }
1220
1221 /****************************************************************************
1222  POSIX function to release a lock. Returns True if the
1223  lock could be released, False if not.
1224  We are given a complete lock state from the upper layer which is what the lock
1225  state should be after the unlock has already been done, so what
1226  we do is punch out holes in the unlock range where locks owned by this process
1227  have a different lock context.
1228 ****************************************************************************/
1229
1230 bool release_posix_lock_posix_flavour(files_struct *fsp,
1231                                 uint64_t u_offset,
1232                                 uint64_t u_count,
1233                                 const struct lock_context *lock_ctx,
1234                                 const struct lock_struct *plocks,
1235                                 int num_locks)
1236 {
1237         bool ret = True;
1238         SMB_OFF_T offset;
1239         SMB_OFF_T count;
1240         TALLOC_CTX *ul_ctx = NULL;
1241         struct lock_list *ulist = NULL;
1242         struct lock_list *ul = NULL;
1243
1244         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f\n",
1245                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1246
1247         /*
1248          * If the requested lock won't fit in the POSIX range, we will
1249          * pretend it was successful.
1250          */
1251
1252         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1253                 return True;
1254         }
1255
1256         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1257                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1258                 return False;
1259         }
1260
1261         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1262                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1263                 talloc_destroy(ul_ctx);
1264                 return False;
1265         }
1266
1267         /*
1268          * Create the initial list entry containing the
1269          * lock we want to remove.
1270          */
1271
1272         ZERO_STRUCTP(ul);
1273         ul->start = offset;
1274         ul->size = count;
1275
1276         DLIST_ADD(ulist, ul);
1277
1278         /*
1279          * Walk the given array creating a linked list
1280          * of unlock requests.
1281          */
1282
1283         ulist = posix_lock_list(ul_ctx,
1284                                 ulist,
1285                                 lock_ctx, /* Lock context ulist belongs to. */
1286                                 fsp,
1287                                 plocks,
1288                                 num_locks);
1289
1290         /*
1291          * Release the POSIX locks on the list of ranges returned.
1292          */
1293
1294         for(; ulist; ulist = ulist->next) {
1295                 offset = ulist->start;
1296                 count = ulist->size;
1297
1298                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1299                         (double)offset, (double)count ));
1300
1301                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1302                         ret = False;
1303                 }
1304         }
1305
1306         talloc_destroy(ul_ctx);
1307         return ret;
1308 }