Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/anholt...
[sfrench/cifs-2.6.git] / fs / gfs2 / glock.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/buffer_head.h>
14 #include <linux/delay.h>
15 #include <linux/sort.h>
16 #include <linux/jhash.h>
17 #include <linux/kallsyms.h>
18 #include <linux/gfs2_ondisk.h>
19 #include <linux/list.h>
20 #include <linux/wait.h>
21 #include <linux/module.h>
22 #include <asm/uaccess.h>
23 #include <linux/seq_file.h>
24 #include <linux/debugfs.h>
25 #include <linux/kthread.h>
26 #include <linux/freezer.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29
30 #include "gfs2.h"
31 #include "incore.h"
32 #include "glock.h"
33 #include "glops.h"
34 #include "inode.h"
35 #include "lops.h"
36 #include "meta_io.h"
37 #include "quota.h"
38 #include "super.h"
39 #include "util.h"
40 #include "bmap.h"
41 #define CREATE_TRACE_POINTS
42 #include "trace_gfs2.h"
43
44 struct gfs2_gl_hash_bucket {
45         struct hlist_head hb_list;
46 };
47
48 struct gfs2_glock_iter {
49         int hash;                       /* hash bucket index         */
50         struct gfs2_sbd *sdp;           /* incore superblock         */
51         struct gfs2_glock *gl;          /* current glock struct      */
52         char string[512];               /* scratch space             */
53 };
54
55 typedef void (*glock_examiner) (struct gfs2_glock * gl);
56
57 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58 static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59 #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61
62 static struct dentry *gfs2_root;
63 static struct workqueue_struct *glock_workqueue;
64 struct workqueue_struct *gfs2_delete_workqueue;
65 static LIST_HEAD(lru_list);
66 static atomic_t lru_count = ATOMIC_INIT(0);
67 static DEFINE_SPINLOCK(lru_lock);
68
69 #define GFS2_GL_HASH_SHIFT      15
70 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
71 #define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
72
73 static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
74 static struct dentry *gfs2_root;
75
76 /*
77  * Despite what you might think, the numbers below are not arbitrary :-)
78  * They are taken from the ipv4 routing hash code, which is well tested
79  * and thus should be nearly optimal. Later on we might tweek the numbers
80  * but for now this should be fine.
81  *
82  * The reason for putting the locks in a separate array from the list heads
83  * is that we can have fewer locks than list heads and save memory. We use
84  * the same hash function for both, but with a different hash mask.
85  */
86 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
87         defined(CONFIG_PROVE_LOCKING)
88
89 #ifdef CONFIG_LOCKDEP
90 # define GL_HASH_LOCK_SZ        256
91 #else
92 # if NR_CPUS >= 32
93 #  define GL_HASH_LOCK_SZ       4096
94 # elif NR_CPUS >= 16
95 #  define GL_HASH_LOCK_SZ       2048
96 # elif NR_CPUS >= 8
97 #  define GL_HASH_LOCK_SZ       1024
98 # elif NR_CPUS >= 4
99 #  define GL_HASH_LOCK_SZ       512
100 # else
101 #  define GL_HASH_LOCK_SZ       256
102 # endif
103 #endif
104
105 /* We never want more locks than chains */
106 #if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
107 # undef GL_HASH_LOCK_SZ
108 # define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
109 #endif
110
111 static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
112
113 static inline rwlock_t *gl_lock_addr(unsigned int x)
114 {
115         return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
116 }
117 #else /* not SMP, so no spinlocks required */
118 static inline rwlock_t *gl_lock_addr(unsigned int x)
119 {
120         return NULL;
121 }
122 #endif
123
124 /**
125  * gl_hash() - Turn glock number into hash bucket number
126  * @lock: The glock number
127  *
128  * Returns: The number of the corresponding hash bucket
129  */
130
131 static unsigned int gl_hash(const struct gfs2_sbd *sdp,
132                             const struct lm_lockname *name)
133 {
134         unsigned int h;
135
136         h = jhash(&name->ln_number, sizeof(u64), 0);
137         h = jhash(&name->ln_type, sizeof(unsigned int), h);
138         h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
139         h &= GFS2_GL_HASH_MASK;
140
141         return h;
142 }
143
144 /**
145  * glock_free() - Perform a few checks and then release struct gfs2_glock
146  * @gl: The glock to release
147  *
148  * Also calls lock module to release its internal structure for this glock.
149  *
150  */
151
152 static void glock_free(struct gfs2_glock *gl)
153 {
154         struct gfs2_sbd *sdp = gl->gl_sbd;
155         struct address_space *mapping = gfs2_glock2aspace(gl);
156         struct kmem_cache *cachep = gfs2_glock_cachep;
157
158         GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
159         trace_gfs2_glock_put(gl);
160         if (mapping)
161                 cachep = gfs2_glock_aspace_cachep;
162         sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
163 }
164
165 /**
166  * gfs2_glock_hold() - increment reference count on glock
167  * @gl: The glock to hold
168  *
169  */
170
171 void gfs2_glock_hold(struct gfs2_glock *gl)
172 {
173         GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
174         atomic_inc(&gl->gl_ref);
175 }
176
177 /**
178  * demote_ok - Check to see if it's ok to unlock a glock
179  * @gl: the glock
180  *
181  * Returns: 1 if it's ok
182  */
183
184 static int demote_ok(const struct gfs2_glock *gl)
185 {
186         const struct gfs2_glock_operations *glops = gl->gl_ops;
187
188         if (gl->gl_state == LM_ST_UNLOCKED)
189                 return 0;
190         if (!list_empty(&gl->gl_holders))
191                 return 0;
192         if (glops->go_demote_ok)
193                 return glops->go_demote_ok(gl);
194         return 1;
195 }
196
197 /**
198  * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
199  * @gl: the glock
200  *
201  */
202
203 static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
204 {
205         int may_reclaim;
206         may_reclaim = (demote_ok(gl) &&
207                        (atomic_read(&gl->gl_ref) == 1 ||
208                         (gl->gl_name.ln_type == LM_TYPE_INODE &&
209                          atomic_read(&gl->gl_ref) <= 2)));
210         spin_lock(&lru_lock);
211         if (list_empty(&gl->gl_lru) && may_reclaim) {
212                 list_add_tail(&gl->gl_lru, &lru_list);
213                 atomic_inc(&lru_count);
214         }
215         spin_unlock(&lru_lock);
216 }
217
218 /**
219  * gfs2_glock_put_nolock() - Decrement reference count on glock
220  * @gl: The glock to put
221  *
222  * This function should only be used if the caller has its own reference
223  * to the glock, in addition to the one it is dropping.
224  */
225
226 void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227 {
228         if (atomic_dec_and_test(&gl->gl_ref))
229                 GLOCK_BUG_ON(gl, 1);
230         gfs2_glock_schedule_for_reclaim(gl);
231 }
232
233 /**
234  * gfs2_glock_put() - Decrement reference count on glock
235  * @gl: The glock to put
236  *
237  */
238
239 int gfs2_glock_put(struct gfs2_glock *gl)
240 {
241         int rv = 0;
242
243         write_lock(gl_lock_addr(gl->gl_hash));
244         if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
245                 hlist_del(&gl->gl_list);
246                 if (!list_empty(&gl->gl_lru)) {
247                         list_del_init(&gl->gl_lru);
248                         atomic_dec(&lru_count);
249                 }
250                 spin_unlock(&lru_lock);
251                 write_unlock(gl_lock_addr(gl->gl_hash));
252                 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
253                 glock_free(gl);
254                 rv = 1;
255                 goto out;
256         }
257         spin_lock(&gl->gl_spin);
258         gfs2_glock_schedule_for_reclaim(gl);
259         spin_unlock(&gl->gl_spin);
260         write_unlock(gl_lock_addr(gl->gl_hash));
261 out:
262         return rv;
263 }
264
265 /**
266  * search_bucket() - Find struct gfs2_glock by lock number
267  * @bucket: the bucket to search
268  * @name: The lock name
269  *
270  * Returns: NULL, or the struct gfs2_glock with the requested number
271  */
272
273 static struct gfs2_glock *search_bucket(unsigned int hash,
274                                         const struct gfs2_sbd *sdp,
275                                         const struct lm_lockname *name)
276 {
277         struct gfs2_glock *gl;
278         struct hlist_node *h;
279
280         hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) {
281                 if (!lm_name_equal(&gl->gl_name, name))
282                         continue;
283                 if (gl->gl_sbd != sdp)
284                         continue;
285
286                 atomic_inc(&gl->gl_ref);
287
288                 return gl;
289         }
290
291         return NULL;
292 }
293
294 /**
295  * may_grant - check if its ok to grant a new lock
296  * @gl: The glock
297  * @gh: The lock request which we wish to grant
298  *
299  * Returns: true if its ok to grant the lock
300  */
301
302 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
303 {
304         const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
305         if ((gh->gh_state == LM_ST_EXCLUSIVE ||
306              gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
307                 return 0;
308         if (gl->gl_state == gh->gh_state)
309                 return 1;
310         if (gh->gh_flags & GL_EXACT)
311                 return 0;
312         if (gl->gl_state == LM_ST_EXCLUSIVE) {
313                 if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
314                         return 1;
315                 if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
316                         return 1;
317         }
318         if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
319                 return 1;
320         return 0;
321 }
322
323 static void gfs2_holder_wake(struct gfs2_holder *gh)
324 {
325         clear_bit(HIF_WAIT, &gh->gh_iflags);
326         smp_mb__after_clear_bit();
327         wake_up_bit(&gh->gh_iflags, HIF_WAIT);
328 }
329
330 /**
331  * do_promote - promote as many requests as possible on the current queue
332  * @gl: The glock
333  * 
334  * Returns: 1 if there is a blocked holder at the head of the list, or 2
335  *          if a type specific operation is underway.
336  */
337
338 static int do_promote(struct gfs2_glock *gl)
339 __releases(&gl->gl_spin)
340 __acquires(&gl->gl_spin)
341 {
342         const struct gfs2_glock_operations *glops = gl->gl_ops;
343         struct gfs2_holder *gh, *tmp;
344         int ret;
345
346 restart:
347         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
348                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
349                         continue;
350                 if (may_grant(gl, gh)) {
351                         if (gh->gh_list.prev == &gl->gl_holders &&
352                             glops->go_lock) {
353                                 spin_unlock(&gl->gl_spin);
354                                 /* FIXME: eliminate this eventually */
355                                 ret = glops->go_lock(gh);
356                                 spin_lock(&gl->gl_spin);
357                                 if (ret) {
358                                         if (ret == 1)
359                                                 return 2;
360                                         gh->gh_error = ret;
361                                         list_del_init(&gh->gh_list);
362                                         trace_gfs2_glock_queue(gh, 0);
363                                         gfs2_holder_wake(gh);
364                                         goto restart;
365                                 }
366                                 set_bit(HIF_HOLDER, &gh->gh_iflags);
367                                 trace_gfs2_promote(gh, 1);
368                                 gfs2_holder_wake(gh);
369                                 goto restart;
370                         }
371                         set_bit(HIF_HOLDER, &gh->gh_iflags);
372                         trace_gfs2_promote(gh, 0);
373                         gfs2_holder_wake(gh);
374                         continue;
375                 }
376                 if (gh->gh_list.prev == &gl->gl_holders)
377                         return 1;
378                 break;
379         }
380         return 0;
381 }
382
383 /**
384  * do_error - Something unexpected has happened during a lock request
385  *
386  */
387
388 static inline void do_error(struct gfs2_glock *gl, const int ret)
389 {
390         struct gfs2_holder *gh, *tmp;
391
392         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
393                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
394                         continue;
395                 if (ret & LM_OUT_ERROR)
396                         gh->gh_error = -EIO;
397                 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
398                         gh->gh_error = GLR_TRYFAILED;
399                 else
400                         continue;
401                 list_del_init(&gh->gh_list);
402                 trace_gfs2_glock_queue(gh, 0);
403                 gfs2_holder_wake(gh);
404         }
405 }
406
407 /**
408  * find_first_waiter - find the first gh that's waiting for the glock
409  * @gl: the glock
410  */
411
412 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
413 {
414         struct gfs2_holder *gh;
415
416         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
417                 if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
418                         return gh;
419         }
420         return NULL;
421 }
422
423 /**
424  * state_change - record that the glock is now in a different state
425  * @gl: the glock
426  * @new_state the new state
427  *
428  */
429
430 static void state_change(struct gfs2_glock *gl, unsigned int new_state)
431 {
432         int held1, held2;
433
434         held1 = (gl->gl_state != LM_ST_UNLOCKED);
435         held2 = (new_state != LM_ST_UNLOCKED);
436
437         if (held1 != held2) {
438                 if (held2)
439                         gfs2_glock_hold(gl);
440                 else
441                         gfs2_glock_put_nolock(gl);
442         }
443
444         gl->gl_state = new_state;
445         gl->gl_tchange = jiffies;
446 }
447
448 static void gfs2_demote_wake(struct gfs2_glock *gl)
449 {
450         gl->gl_demote_state = LM_ST_EXCLUSIVE;
451         clear_bit(GLF_DEMOTE, &gl->gl_flags);
452         smp_mb__after_clear_bit();
453         wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
454 }
455
456 /**
457  * finish_xmote - The DLM has replied to one of our lock requests
458  * @gl: The glock
459  * @ret: The status from the DLM
460  *
461  */
462
463 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
464 {
465         const struct gfs2_glock_operations *glops = gl->gl_ops;
466         struct gfs2_holder *gh;
467         unsigned state = ret & LM_OUT_ST_MASK;
468         int rv;
469
470         spin_lock(&gl->gl_spin);
471         trace_gfs2_glock_state_change(gl, state);
472         state_change(gl, state);
473         gh = find_first_waiter(gl);
474
475         /* Demote to UN request arrived during demote to SH or DF */
476         if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
477             state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
478                 gl->gl_target = LM_ST_UNLOCKED;
479
480         /* Check for state != intended state */
481         if (unlikely(state != gl->gl_target)) {
482                 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
483                         /* move to back of queue and try next entry */
484                         if (ret & LM_OUT_CANCELED) {
485                                 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
486                                         list_move_tail(&gh->gh_list, &gl->gl_holders);
487                                 gh = find_first_waiter(gl);
488                                 gl->gl_target = gh->gh_state;
489                                 goto retry;
490                         }
491                         /* Some error or failed "try lock" - report it */
492                         if ((ret & LM_OUT_ERROR) ||
493                             (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
494                                 gl->gl_target = gl->gl_state;
495                                 do_error(gl, ret);
496                                 goto out;
497                         }
498                 }
499                 switch(state) {
500                 /* Unlocked due to conversion deadlock, try again */
501                 case LM_ST_UNLOCKED:
502 retry:
503                         do_xmote(gl, gh, gl->gl_target);
504                         break;
505                 /* Conversion fails, unlock and try again */
506                 case LM_ST_SHARED:
507                 case LM_ST_DEFERRED:
508                         do_xmote(gl, gh, LM_ST_UNLOCKED);
509                         break;
510                 default: /* Everything else */
511                         printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state);
512                         GLOCK_BUG_ON(gl, 1);
513                 }
514                 spin_unlock(&gl->gl_spin);
515                 return;
516         }
517
518         /* Fast path - we got what we asked for */
519         if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
520                 gfs2_demote_wake(gl);
521         if (state != LM_ST_UNLOCKED) {
522                 if (glops->go_xmote_bh) {
523                         spin_unlock(&gl->gl_spin);
524                         rv = glops->go_xmote_bh(gl, gh);
525                         spin_lock(&gl->gl_spin);
526                         if (rv) {
527                                 do_error(gl, rv);
528                                 goto out;
529                         }
530                 }
531                 rv = do_promote(gl);
532                 if (rv == 2)
533                         goto out_locked;
534         }
535 out:
536         clear_bit(GLF_LOCK, &gl->gl_flags);
537 out_locked:
538         spin_unlock(&gl->gl_spin);
539 }
540
541 static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
542                                  unsigned int req_state,
543                                  unsigned int flags)
544 {
545         int ret = LM_OUT_ERROR;
546
547         if (!sdp->sd_lockstruct.ls_ops->lm_lock)
548                 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
549
550         if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
551                 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
552                                                          req_state, flags);
553         return ret;
554 }
555
556 /**
557  * do_xmote - Calls the DLM to change the state of a lock
558  * @gl: The lock state
559  * @gh: The holder (only for promotes)
560  * @target: The target lock state
561  *
562  */
563
564 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
565 __releases(&gl->gl_spin)
566 __acquires(&gl->gl_spin)
567 {
568         const struct gfs2_glock_operations *glops = gl->gl_ops;
569         struct gfs2_sbd *sdp = gl->gl_sbd;
570         unsigned int lck_flags = gh ? gh->gh_flags : 0;
571         int ret;
572
573         lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
574                       LM_FLAG_PRIORITY);
575         BUG_ON(gl->gl_state == target);
576         BUG_ON(gl->gl_state == gl->gl_target);
577         if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
578             glops->go_inval) {
579                 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
580                 do_error(gl, 0); /* Fail queued try locks */
581         }
582         spin_unlock(&gl->gl_spin);
583         if (glops->go_xmote_th)
584                 glops->go_xmote_th(gl);
585         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
586                 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
587         clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
588
589         gfs2_glock_hold(gl);
590         if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
591             gl->gl_state == LM_ST_DEFERRED) &&
592             !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
593                 lck_flags |= LM_FLAG_TRY_1CB;
594         ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
595
596         if (!(ret & LM_OUT_ASYNC)) {
597                 finish_xmote(gl, ret);
598                 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
599                         gfs2_glock_put(gl);
600         } else {
601                 GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
602         }
603         spin_lock(&gl->gl_spin);
604 }
605
606 /**
607  * find_first_holder - find the first "holder" gh
608  * @gl: the glock
609  */
610
611 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
612 {
613         struct gfs2_holder *gh;
614
615         if (!list_empty(&gl->gl_holders)) {
616                 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
617                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
618                         return gh;
619         }
620         return NULL;
621 }
622
623 /**
624  * run_queue - do all outstanding tasks related to a glock
625  * @gl: The glock in question
626  * @nonblock: True if we must not block in run_queue
627  *
628  */
629
630 static void run_queue(struct gfs2_glock *gl, const int nonblock)
631 __releases(&gl->gl_spin)
632 __acquires(&gl->gl_spin)
633 {
634         struct gfs2_holder *gh = NULL;
635         int ret;
636
637         if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
638                 return;
639
640         GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
641
642         if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
643             gl->gl_demote_state != gl->gl_state) {
644                 if (find_first_holder(gl))
645                         goto out_unlock;
646                 if (nonblock)
647                         goto out_sched;
648                 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
649                 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
650                 gl->gl_target = gl->gl_demote_state;
651         } else {
652                 if (test_bit(GLF_DEMOTE, &gl->gl_flags))
653                         gfs2_demote_wake(gl);
654                 ret = do_promote(gl);
655                 if (ret == 0)
656                         goto out_unlock;
657                 if (ret == 2)
658                         goto out;
659                 gh = find_first_waiter(gl);
660                 gl->gl_target = gh->gh_state;
661                 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
662                         do_error(gl, 0); /* Fail queued try locks */
663         }
664         do_xmote(gl, gh, gl->gl_target);
665 out:
666         return;
667
668 out_sched:
669         clear_bit(GLF_LOCK, &gl->gl_flags);
670         smp_mb__after_clear_bit();
671         gfs2_glock_hold(gl);
672         if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
673                 gfs2_glock_put_nolock(gl);
674         return;
675
676 out_unlock:
677         clear_bit(GLF_LOCK, &gl->gl_flags);
678         smp_mb__after_clear_bit();
679         return;
680 }
681
682 static void delete_work_func(struct work_struct *work)
683 {
684         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
685         struct gfs2_sbd *sdp = gl->gl_sbd;
686         struct gfs2_inode *ip = NULL;
687         struct inode *inode;
688         u64 no_addr = 0;
689
690         spin_lock(&gl->gl_spin);
691         ip = (struct gfs2_inode *)gl->gl_object;
692         if (ip)
693                 no_addr = ip->i_no_addr;
694         spin_unlock(&gl->gl_spin);
695         if (ip) {
696                 inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
697                 if (inode) {
698                         d_prune_aliases(inode);
699                         iput(inode);
700                 }
701         }
702         gfs2_glock_put(gl);
703 }
704
705 static void glock_work_func(struct work_struct *work)
706 {
707         unsigned long delay = 0;
708         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709         struct gfs2_holder *gh;
710         int drop_ref = 0;
711
712         if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
713                 spin_lock(&gl->gl_spin);
714                 gh = find_first_waiter(gl);
715                 if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
716                     test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
717                         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
718                 spin_unlock(&gl->gl_spin);
719         }
720
721         if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
722                 finish_xmote(gl, gl->gl_reply);
723                 drop_ref = 1;
724         }
725         spin_lock(&gl->gl_spin);
726         if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
727             gl->gl_state != LM_ST_UNLOCKED &&
728             gl->gl_demote_state != LM_ST_EXCLUSIVE) {
729                 unsigned long holdtime, now = jiffies;
730                 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
731                 if (time_before(now, holdtime))
732                         delay = holdtime - now;
733                 set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
734         }
735         run_queue(gl, 0);
736         spin_unlock(&gl->gl_spin);
737         if (!delay ||
738             queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
739                 gfs2_glock_put(gl);
740         if (drop_ref)
741                 gfs2_glock_put(gl);
742 }
743
744 /**
745  * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
746  * @sdp: The GFS2 superblock
747  * @number: the lock number
748  * @glops: The glock_operations to use
749  * @create: If 0, don't create the glock if it doesn't exist
750  * @glp: the glock is returned here
751  *
752  * This does not lock a glock, just finds/creates structures for one.
753  *
754  * Returns: errno
755  */
756
757 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
758                    const struct gfs2_glock_operations *glops, int create,
759                    struct gfs2_glock **glp)
760 {
761         struct super_block *s = sdp->sd_vfs;
762         struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type };
763         struct gfs2_glock *gl, *tmp;
764         unsigned int hash = gl_hash(sdp, &name);
765         struct address_space *mapping;
766
767         read_lock(gl_lock_addr(hash));
768         gl = search_bucket(hash, sdp, &name);
769         read_unlock(gl_lock_addr(hash));
770
771         *glp = gl;
772         if (gl)
773                 return 0;
774         if (!create)
775                 return -ENOENT;
776
777         if (glops->go_flags & GLOF_ASPACE)
778                 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL);
779         else
780                 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
781         if (!gl)
782                 return -ENOMEM;
783
784         atomic_inc(&sdp->sd_glock_disposal);
785         gl->gl_flags = 0;
786         gl->gl_name = name;
787         atomic_set(&gl->gl_ref, 1);
788         gl->gl_state = LM_ST_UNLOCKED;
789         gl->gl_target = LM_ST_UNLOCKED;
790         gl->gl_demote_state = LM_ST_EXCLUSIVE;
791         gl->gl_hash = hash;
792         gl->gl_ops = glops;
793         snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number);
794         memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
795         gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
796         gl->gl_tchange = jiffies;
797         gl->gl_object = NULL;
798         gl->gl_sbd = sdp;
799         INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
800         INIT_WORK(&gl->gl_delete, delete_work_func);
801
802         mapping = gfs2_glock2aspace(gl);
803         if (mapping) {
804                 mapping->a_ops = &gfs2_meta_aops;
805                 mapping->host = s->s_bdev->bd_inode;
806                 mapping->flags = 0;
807                 mapping_set_gfp_mask(mapping, GFP_NOFS);
808                 mapping->assoc_mapping = NULL;
809                 mapping->backing_dev_info = s->s_bdi;
810                 mapping->writeback_index = 0;
811         }
812
813         write_lock(gl_lock_addr(hash));
814         tmp = search_bucket(hash, sdp, &name);
815         if (tmp) {
816                 write_unlock(gl_lock_addr(hash));
817                 glock_free(gl);
818                 gl = tmp;
819         } else {
820                 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list);
821                 write_unlock(gl_lock_addr(hash));
822         }
823
824         *glp = gl;
825
826         return 0;
827 }
828
829 /**
830  * gfs2_holder_init - initialize a struct gfs2_holder in the default way
831  * @gl: the glock
832  * @state: the state we're requesting
833  * @flags: the modifier flags
834  * @gh: the holder structure
835  *
836  */
837
838 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
839                       struct gfs2_holder *gh)
840 {
841         INIT_LIST_HEAD(&gh->gh_list);
842         gh->gh_gl = gl;
843         gh->gh_ip = (unsigned long)__builtin_return_address(0);
844         gh->gh_owner_pid = get_pid(task_pid(current));
845         gh->gh_state = state;
846         gh->gh_flags = flags;
847         gh->gh_error = 0;
848         gh->gh_iflags = 0;
849         gfs2_glock_hold(gl);
850 }
851
852 /**
853  * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
854  * @state: the state we're requesting
855  * @flags: the modifier flags
856  * @gh: the holder structure
857  *
858  * Don't mess with the glock.
859  *
860  */
861
862 void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh)
863 {
864         gh->gh_state = state;
865         gh->gh_flags = flags;
866         gh->gh_iflags = 0;
867         gh->gh_ip = (unsigned long)__builtin_return_address(0);
868         if (gh->gh_owner_pid)
869                 put_pid(gh->gh_owner_pid);
870         gh->gh_owner_pid = get_pid(task_pid(current));
871 }
872
873 /**
874  * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
875  * @gh: the holder structure
876  *
877  */
878
879 void gfs2_holder_uninit(struct gfs2_holder *gh)
880 {
881         put_pid(gh->gh_owner_pid);
882         gfs2_glock_put(gh->gh_gl);
883         gh->gh_gl = NULL;
884         gh->gh_ip = 0;
885 }
886
887 /**
888  * gfs2_glock_holder_wait
889  * @word: unused
890  *
891  * This function and gfs2_glock_demote_wait both show up in the WCHAN
892  * field. Thus I've separated these otherwise identical functions in
893  * order to be more informative to the user.
894  */
895
896 static int gfs2_glock_holder_wait(void *word)
897 {
898         schedule();
899         return 0;
900 }
901
902 static int gfs2_glock_demote_wait(void *word)
903 {
904         schedule();
905         return 0;
906 }
907
908 static void wait_on_holder(struct gfs2_holder *gh)
909 {
910         might_sleep();
911         wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
912 }
913
914 static void wait_on_demote(struct gfs2_glock *gl)
915 {
916         might_sleep();
917         wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
918 }
919
920 /**
921  * handle_callback - process a demote request
922  * @gl: the glock
923  * @state: the state the caller wants us to change to
924  *
925  * There are only two requests that we are going to see in actual
926  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
927  */
928
929 static void handle_callback(struct gfs2_glock *gl, unsigned int state,
930                             unsigned long delay)
931 {
932         int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
933
934         set_bit(bit, &gl->gl_flags);
935         if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
936                 gl->gl_demote_state = state;
937                 gl->gl_demote_time = jiffies;
938         } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
939                         gl->gl_demote_state != state) {
940                 gl->gl_demote_state = LM_ST_UNLOCKED;
941         }
942         if (gl->gl_ops->go_callback)
943                 gl->gl_ops->go_callback(gl);
944         trace_gfs2_demote_rq(gl);
945 }
946
947 /**
948  * gfs2_glock_wait - wait on a glock acquisition
949  * @gh: the glock holder
950  *
951  * Returns: 0 on success
952  */
953
954 int gfs2_glock_wait(struct gfs2_holder *gh)
955 {
956         wait_on_holder(gh);
957         return gh->gh_error;
958 }
959
960 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
961 {
962         va_list args;
963
964         va_start(args, fmt);
965         if (seq) {
966                 struct gfs2_glock_iter *gi = seq->private;
967                 vsprintf(gi->string, fmt, args);
968                 seq_printf(seq, gi->string);
969         } else {
970                 printk(KERN_ERR " ");
971                 vprintk(fmt, args);
972         }
973         va_end(args);
974 }
975
976 /**
977  * add_to_queue - Add a holder to the wait queue (but look for recursion)
978  * @gh: the holder structure to add
979  *
980  * Eventually we should move the recursive locking trap to a
981  * debugging option or something like that. This is the fast
982  * path and needs to have the minimum number of distractions.
983  * 
984  */
985
986 static inline void add_to_queue(struct gfs2_holder *gh)
987 __releases(&gl->gl_spin)
988 __acquires(&gl->gl_spin)
989 {
990         struct gfs2_glock *gl = gh->gh_gl;
991         struct gfs2_sbd *sdp = gl->gl_sbd;
992         struct list_head *insert_pt = NULL;
993         struct gfs2_holder *gh2;
994         int try_lock = 0;
995
996         BUG_ON(gh->gh_owner_pid == NULL);
997         if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
998                 BUG();
999
1000         if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1001                 if (test_bit(GLF_LOCK, &gl->gl_flags))
1002                         try_lock = 1;
1003                 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
1004                         goto fail;
1005         }
1006
1007         list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
1008                 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1009                     (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1010                         goto trap_recursive;
1011                 if (try_lock &&
1012                     !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
1013                     !may_grant(gl, gh)) {
1014 fail:
1015                         gh->gh_error = GLR_TRYFAILED;
1016                         gfs2_holder_wake(gh);
1017                         return;
1018                 }
1019                 if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
1020                         continue;
1021                 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
1022                         insert_pt = &gh2->gh_list;
1023         }
1024         if (likely(insert_pt == NULL)) {
1025                 list_add_tail(&gh->gh_list, &gl->gl_holders);
1026                 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1027                         goto do_cancel;
1028                 return;
1029         }
1030         trace_gfs2_glock_queue(gh, 1);
1031         list_add_tail(&gh->gh_list, insert_pt);
1032 do_cancel:
1033         gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
1034         if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
1035                 spin_unlock(&gl->gl_spin);
1036                 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
1037                         sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
1038                 spin_lock(&gl->gl_spin);
1039         }
1040         return;
1041
1042 trap_recursive:
1043         print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip);
1044         printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid));
1045         printk(KERN_ERR "lock type: %d req lock state : %d\n",
1046                gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
1047         print_symbol(KERN_ERR "new: %s\n", gh->gh_ip);
1048         printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid));
1049         printk(KERN_ERR "lock type: %d req lock state : %d\n",
1050                gh->gh_gl->gl_name.ln_type, gh->gh_state);
1051         __dump_glock(NULL, gl);
1052         BUG();
1053 }
1054
1055 /**
1056  * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1057  * @gh: the holder structure
1058  *
1059  * if (gh->gh_flags & GL_ASYNC), this never returns an error
1060  *
1061  * Returns: 0, GLR_TRYFAILED, or errno on failure
1062  */
1063
1064 int gfs2_glock_nq(struct gfs2_holder *gh)
1065 {
1066         struct gfs2_glock *gl = gh->gh_gl;
1067         struct gfs2_sbd *sdp = gl->gl_sbd;
1068         int error = 0;
1069
1070         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1071                 return -EIO;
1072
1073         spin_lock(&gl->gl_spin);
1074         add_to_queue(gh);
1075         run_queue(gl, 1);
1076         spin_unlock(&gl->gl_spin);
1077
1078         if (!(gh->gh_flags & GL_ASYNC))
1079                 error = gfs2_glock_wait(gh);
1080
1081         return error;
1082 }
1083
1084 /**
1085  * gfs2_glock_poll - poll to see if an async request has been completed
1086  * @gh: the holder
1087  *
1088  * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1089  */
1090
1091 int gfs2_glock_poll(struct gfs2_holder *gh)
1092 {
1093         return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
1094 }
1095
1096 /**
1097  * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1098  * @gh: the glock holder
1099  *
1100  */
1101
1102 void gfs2_glock_dq(struct gfs2_holder *gh)
1103 {
1104         struct gfs2_glock *gl = gh->gh_gl;
1105         const struct gfs2_glock_operations *glops = gl->gl_ops;
1106         unsigned delay = 0;
1107         int fast_path = 0;
1108
1109         spin_lock(&gl->gl_spin);
1110         if (gh->gh_flags & GL_NOCACHE)
1111                 handle_callback(gl, LM_ST_UNLOCKED, 0);
1112
1113         list_del_init(&gh->gh_list);
1114         if (find_first_holder(gl) == NULL) {
1115                 if (glops->go_unlock) {
1116                         GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
1117                         spin_unlock(&gl->gl_spin);
1118                         glops->go_unlock(gh);
1119                         spin_lock(&gl->gl_spin);
1120                         clear_bit(GLF_LOCK, &gl->gl_flags);
1121                 }
1122                 if (list_empty(&gl->gl_holders) &&
1123                     !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1124                     !test_bit(GLF_DEMOTE, &gl->gl_flags))
1125                         fast_path = 1;
1126         }
1127         trace_gfs2_glock_queue(gh, 0);
1128         spin_unlock(&gl->gl_spin);
1129         if (likely(fast_path))
1130                 return;
1131
1132         gfs2_glock_hold(gl);
1133         if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1134             !test_bit(GLF_DEMOTE, &gl->gl_flags))
1135                 delay = gl->gl_ops->go_min_hold_time;
1136         if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1137                 gfs2_glock_put(gl);
1138 }
1139
1140 void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1141 {
1142         struct gfs2_glock *gl = gh->gh_gl;
1143         gfs2_glock_dq(gh);
1144         wait_on_demote(gl);
1145 }
1146
1147 /**
1148  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1149  * @gh: the holder structure
1150  *
1151  */
1152
1153 void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1154 {
1155         gfs2_glock_dq(gh);
1156         gfs2_holder_uninit(gh);
1157 }
1158
1159 /**
1160  * gfs2_glock_nq_num - acquire a glock based on lock number
1161  * @sdp: the filesystem
1162  * @number: the lock number
1163  * @glops: the glock operations for the type of glock
1164  * @state: the state to acquire the glock in
1165  * @flags: modifier flags for the aquisition
1166  * @gh: the struct gfs2_holder
1167  *
1168  * Returns: errno
1169  */
1170
1171 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1172                       const struct gfs2_glock_operations *glops,
1173                       unsigned int state, int flags, struct gfs2_holder *gh)
1174 {
1175         struct gfs2_glock *gl;
1176         int error;
1177
1178         error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1179         if (!error) {
1180                 error = gfs2_glock_nq_init(gl, state, flags, gh);
1181                 gfs2_glock_put(gl);
1182         }
1183
1184         return error;
1185 }
1186
1187 /**
1188  * glock_compare - Compare two struct gfs2_glock structures for sorting
1189  * @arg_a: the first structure
1190  * @arg_b: the second structure
1191  *
1192  */
1193
1194 static int glock_compare(const void *arg_a, const void *arg_b)
1195 {
1196         const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1197         const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1198         const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1199         const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1200
1201         if (a->ln_number > b->ln_number)
1202                 return 1;
1203         if (a->ln_number < b->ln_number)
1204                 return -1;
1205         BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1206         return 0;
1207 }
1208
1209 /**
1210  * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1211  * @num_gh: the number of structures
1212  * @ghs: an array of struct gfs2_holder structures
1213  *
1214  * Returns: 0 on success (all glocks acquired),
1215  *          errno on failure (no glocks acquired)
1216  */
1217
1218 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1219                      struct gfs2_holder **p)
1220 {
1221         unsigned int x;
1222         int error = 0;
1223
1224         for (x = 0; x < num_gh; x++)
1225                 p[x] = &ghs[x];
1226
1227         sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1228
1229         for (x = 0; x < num_gh; x++) {
1230                 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1231
1232                 error = gfs2_glock_nq(p[x]);
1233                 if (error) {
1234                         while (x--)
1235                                 gfs2_glock_dq(p[x]);
1236                         break;
1237                 }
1238         }
1239
1240         return error;
1241 }
1242
1243 /**
1244  * gfs2_glock_nq_m - acquire multiple glocks
1245  * @num_gh: the number of structures
1246  * @ghs: an array of struct gfs2_holder structures
1247  *
1248  *
1249  * Returns: 0 on success (all glocks acquired),
1250  *          errno on failure (no glocks acquired)
1251  */
1252
1253 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1254 {
1255         struct gfs2_holder *tmp[4];
1256         struct gfs2_holder **pph = tmp;
1257         int error = 0;
1258
1259         switch(num_gh) {
1260         case 0:
1261                 return 0;
1262         case 1:
1263                 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1264                 return gfs2_glock_nq(ghs);
1265         default:
1266                 if (num_gh <= 4)
1267                         break;
1268                 pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
1269                 if (!pph)
1270                         return -ENOMEM;
1271         }
1272
1273         error = nq_m_sync(num_gh, ghs, pph);
1274
1275         if (pph != tmp)
1276                 kfree(pph);
1277
1278         return error;
1279 }
1280
1281 /**
1282  * gfs2_glock_dq_m - release multiple glocks
1283  * @num_gh: the number of structures
1284  * @ghs: an array of struct gfs2_holder structures
1285  *
1286  */
1287
1288 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1289 {
1290         unsigned int x;
1291
1292         for (x = 0; x < num_gh; x++)
1293                 gfs2_glock_dq(&ghs[x]);
1294 }
1295
1296 /**
1297  * gfs2_glock_dq_uninit_m - release multiple glocks
1298  * @num_gh: the number of structures
1299  * @ghs: an array of struct gfs2_holder structures
1300  *
1301  */
1302
1303 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1304 {
1305         unsigned int x;
1306
1307         for (x = 0; x < num_gh; x++)
1308                 gfs2_glock_dq_uninit(&ghs[x]);
1309 }
1310
1311 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1312 {
1313         unsigned long delay = 0;
1314         unsigned long holdtime;
1315         unsigned long now = jiffies;
1316
1317         gfs2_glock_hold(gl);
1318         holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1319         if (time_before(now, holdtime))
1320                 delay = holdtime - now;
1321         if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1322                 delay = gl->gl_ops->go_min_hold_time;
1323
1324         spin_lock(&gl->gl_spin);
1325         handle_callback(gl, state, delay);
1326         spin_unlock(&gl->gl_spin);
1327         if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1328                 gfs2_glock_put(gl);
1329 }
1330
1331 /**
1332  * gfs2_glock_complete - Callback used by locking
1333  * @gl: Pointer to the glock
1334  * @ret: The return value from the dlm
1335  *
1336  */
1337
1338 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1339 {
1340         struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1341         gl->gl_reply = ret;
1342         if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1343                 struct gfs2_holder *gh;
1344                 spin_lock(&gl->gl_spin);
1345                 gh = find_first_waiter(gl);
1346                 if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
1347                      (gl->gl_target != LM_ST_UNLOCKED)) ||
1348                     ((ret & ~LM_OUT_ST_MASK) != 0))
1349                         set_bit(GLF_FROZEN, &gl->gl_flags);
1350                 spin_unlock(&gl->gl_spin);
1351                 if (test_bit(GLF_FROZEN, &gl->gl_flags))
1352                         return;
1353         }
1354         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1355         gfs2_glock_hold(gl);
1356         if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1357                 gfs2_glock_put(gl);
1358 }
1359
1360
1361 static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1362 {
1363         struct gfs2_glock *gl;
1364         int may_demote;
1365         int nr_skipped = 0;
1366         LIST_HEAD(skipped);
1367
1368         if (nr == 0)
1369                 goto out;
1370
1371         if (!(gfp_mask & __GFP_FS))
1372                 return -1;
1373
1374         spin_lock(&lru_lock);
1375         while(nr && !list_empty(&lru_list)) {
1376                 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1377                 list_del_init(&gl->gl_lru);
1378                 atomic_dec(&lru_count);
1379
1380                 /* Test for being demotable */
1381                 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1382                         gfs2_glock_hold(gl);
1383                         spin_unlock(&lru_lock);
1384                         spin_lock(&gl->gl_spin);
1385                         may_demote = demote_ok(gl);
1386                         if (may_demote) {
1387                                 handle_callback(gl, LM_ST_UNLOCKED, 0);
1388                                 nr--;
1389                         }
1390                         clear_bit(GLF_LOCK, &gl->gl_flags);
1391                         smp_mb__after_clear_bit();
1392                         if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1393                                 gfs2_glock_put_nolock(gl);
1394                         spin_unlock(&gl->gl_spin);
1395                         spin_lock(&lru_lock);
1396                         continue;
1397                 }
1398                 nr_skipped++;
1399                 list_add(&gl->gl_lru, &skipped);
1400         }
1401         list_splice(&skipped, &lru_list);
1402         atomic_add(nr_skipped, &lru_count);
1403         spin_unlock(&lru_lock);
1404 out:
1405         return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
1406 }
1407
1408 static struct shrinker glock_shrinker = {
1409         .shrink = gfs2_shrink_glock_memory,
1410         .seeks = DEFAULT_SEEKS,
1411 };
1412
1413 /**
1414  * examine_bucket - Call a function for glock in a hash bucket
1415  * @examiner: the function
1416  * @sdp: the filesystem
1417  * @bucket: the bucket
1418  *
1419  * Returns: 1 if the bucket has entries
1420  */
1421
1422 static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
1423                           unsigned int hash)
1424 {
1425         struct gfs2_glock *gl, *prev = NULL;
1426         int has_entries = 0;
1427         struct hlist_head *head = &gl_hash_table[hash].hb_list;
1428
1429         read_lock(gl_lock_addr(hash));
1430         /* Can't use hlist_for_each_entry - don't want prefetch here */
1431         if (hlist_empty(head))
1432                 goto out;
1433         gl = list_entry(head->first, struct gfs2_glock, gl_list);
1434         while(1) {
1435                 if (!sdp || gl->gl_sbd == sdp) {
1436                         gfs2_glock_hold(gl);
1437                         read_unlock(gl_lock_addr(hash));
1438                         if (prev)
1439                                 gfs2_glock_put(prev);
1440                         prev = gl;
1441                         examiner(gl);
1442                         has_entries = 1;
1443                         read_lock(gl_lock_addr(hash));
1444                 }
1445                 if (gl->gl_list.next == NULL)
1446                         break;
1447                 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1448         }
1449 out:
1450         read_unlock(gl_lock_addr(hash));
1451         if (prev)
1452                 gfs2_glock_put(prev);
1453         cond_resched();
1454         return has_entries;
1455 }
1456
1457
1458 /**
1459  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1460  * @gl: The glock to thaw
1461  *
1462  * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
1463  * so this has to result in the ref count being dropped by one.
1464  */
1465
1466 static void thaw_glock(struct gfs2_glock *gl)
1467 {
1468         if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
1469                 return;
1470         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1471         gfs2_glock_hold(gl);
1472         if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1473                 gfs2_glock_put(gl);
1474 }
1475
1476 /**
1477  * clear_glock - look at a glock and see if we can free it from glock cache
1478  * @gl: the glock to look at
1479  *
1480  */
1481
1482 static void clear_glock(struct gfs2_glock *gl)
1483 {
1484         spin_lock(&lru_lock);
1485         if (!list_empty(&gl->gl_lru)) {
1486                 list_del_init(&gl->gl_lru);
1487                 atomic_dec(&lru_count);
1488         }
1489         spin_unlock(&lru_lock);
1490
1491         spin_lock(&gl->gl_spin);
1492         if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED)
1493                 handle_callback(gl, LM_ST_UNLOCKED, 0);
1494         spin_unlock(&gl->gl_spin);
1495         gfs2_glock_hold(gl);
1496         if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1497                 gfs2_glock_put(gl);
1498 }
1499
1500 /**
1501  * gfs2_glock_thaw - Thaw any frozen glocks
1502  * @sdp: The super block
1503  *
1504  */
1505
1506 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1507 {
1508         unsigned x;
1509
1510         for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1511                 examine_bucket(thaw_glock, sdp, x);
1512 }
1513
1514 /**
1515  * gfs2_gl_hash_clear - Empty out the glock hash table
1516  * @sdp: the filesystem
1517  * @wait: wait until it's all gone
1518  *
1519  * Called when unmounting the filesystem.
1520  */
1521
1522 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1523 {
1524         unsigned int x;
1525
1526         for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1527                 examine_bucket(clear_glock, sdp, x);
1528         flush_workqueue(glock_workqueue);
1529         wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1530         gfs2_dump_lockstate(sdp);
1531 }
1532
1533 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
1534 {
1535         struct gfs2_glock *gl = ip->i_gl;
1536         int ret;
1537
1538         ret = gfs2_truncatei_resume(ip);
1539         gfs2_assert_withdraw(gl->gl_sbd, ret == 0);
1540
1541         spin_lock(&gl->gl_spin);
1542         clear_bit(GLF_LOCK, &gl->gl_flags);
1543         run_queue(gl, 1);
1544         spin_unlock(&gl->gl_spin);
1545 }
1546
1547 static const char *state2str(unsigned state)
1548 {
1549         switch(state) {
1550         case LM_ST_UNLOCKED:
1551                 return "UN";
1552         case LM_ST_SHARED:
1553                 return "SH";
1554         case LM_ST_DEFERRED:
1555                 return "DF";
1556         case LM_ST_EXCLUSIVE:
1557                 return "EX";
1558         }
1559         return "??";
1560 }
1561
1562 static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1563 {
1564         char *p = buf;
1565         if (flags & LM_FLAG_TRY)
1566                 *p++ = 't';
1567         if (flags & LM_FLAG_TRY_1CB)
1568                 *p++ = 'T';
1569         if (flags & LM_FLAG_NOEXP)
1570                 *p++ = 'e';
1571         if (flags & LM_FLAG_ANY)
1572                 *p++ = 'A';
1573         if (flags & LM_FLAG_PRIORITY)
1574                 *p++ = 'p';
1575         if (flags & GL_ASYNC)
1576                 *p++ = 'a';
1577         if (flags & GL_EXACT)
1578                 *p++ = 'E';
1579         if (flags & GL_NOCACHE)
1580                 *p++ = 'c';
1581         if (test_bit(HIF_HOLDER, &iflags))
1582                 *p++ = 'H';
1583         if (test_bit(HIF_WAIT, &iflags))
1584                 *p++ = 'W';
1585         if (test_bit(HIF_FIRST, &iflags))
1586                 *p++ = 'F';
1587         *p = 0;
1588         return buf;
1589 }
1590
1591 /**
1592  * dump_holder - print information about a glock holder
1593  * @seq: the seq_file struct
1594  * @gh: the glock holder
1595  *
1596  * Returns: 0 on success, -ENOBUFS when we run out of space
1597  */
1598
1599 static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1600 {
1601         struct task_struct *gh_owner = NULL;
1602         char buffer[KSYM_SYMBOL_LEN];
1603         char flags_buf[32];
1604
1605         sprint_symbol(buffer, gh->gh_ip);
1606         if (gh->gh_owner_pid)
1607                 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1608         gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
1609                   state2str(gh->gh_state),
1610                   hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1611                   gh->gh_error, 
1612                   gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1613                   gh_owner ? gh_owner->comm : "(ended)", buffer);
1614         return 0;
1615 }
1616
1617 static const char *gflags2str(char *buf, const unsigned long *gflags)
1618 {
1619         char *p = buf;
1620         if (test_bit(GLF_LOCK, gflags))
1621                 *p++ = 'l';
1622         if (test_bit(GLF_DEMOTE, gflags))
1623                 *p++ = 'D';
1624         if (test_bit(GLF_PENDING_DEMOTE, gflags))
1625                 *p++ = 'd';
1626         if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
1627                 *p++ = 'p';
1628         if (test_bit(GLF_DIRTY, gflags))
1629                 *p++ = 'y';
1630         if (test_bit(GLF_LFLUSH, gflags))
1631                 *p++ = 'f';
1632         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
1633                 *p++ = 'i';
1634         if (test_bit(GLF_REPLY_PENDING, gflags))
1635                 *p++ = 'r';
1636         if (test_bit(GLF_INITIAL, gflags))
1637                 *p++ = 'I';
1638         if (test_bit(GLF_FROZEN, gflags))
1639                 *p++ = 'F';
1640         *p = 0;
1641         return buf;
1642 }
1643
1644 /**
1645  * __dump_glock - print information about a glock
1646  * @seq: The seq_file struct
1647  * @gl: the glock
1648  *
1649  * The file format is as follows:
1650  * One line per object, capital letters are used to indicate objects
1651  * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
1652  * other objects are indented by a single space and follow the glock to
1653  * which they are related. Fields are indicated by lower case letters
1654  * followed by a colon and the field value, except for strings which are in
1655  * [] so that its possible to see if they are composed of spaces for
1656  * example. The field's are n = number (id of the object), f = flags,
1657  * t = type, s = state, r = refcount, e = error, p = pid.
1658  *
1659  * Returns: 0 on success, -ENOBUFS when we run out of space
1660  */
1661
1662 static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1663 {
1664         const struct gfs2_glock_operations *glops = gl->gl_ops;
1665         unsigned long long dtime;
1666         const struct gfs2_holder *gh;
1667         char gflags_buf[32];
1668         int error = 0;
1669
1670         dtime = jiffies - gl->gl_demote_time;
1671         dtime *= 1000000/HZ; /* demote time in uSec */
1672         if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1673                 dtime = 0;
1674         gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
1675                   state2str(gl->gl_state),
1676                   gl->gl_name.ln_type,
1677                   (unsigned long long)gl->gl_name.ln_number,
1678                   gflags2str(gflags_buf, &gl->gl_flags),
1679                   state2str(gl->gl_target),
1680                   state2str(gl->gl_demote_state), dtime,
1681                   atomic_read(&gl->gl_ail_count),
1682                   atomic_read(&gl->gl_ref));
1683
1684         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1685                 error = dump_holder(seq, gh);
1686                 if (error)
1687                         goto out;
1688         }
1689         if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
1690                 error = glops->go_dump(seq, gl);
1691 out:
1692         return error;
1693 }
1694
1695 static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1696 {
1697         int ret;
1698         spin_lock(&gl->gl_spin);
1699         ret = __dump_glock(seq, gl);
1700         spin_unlock(&gl->gl_spin);
1701         return ret;
1702 }
1703
1704 /**
1705  * gfs2_dump_lockstate - print out the current lockstate
1706  * @sdp: the filesystem
1707  * @ub: the buffer to copy the information into
1708  *
1709  * If @ub is NULL, dump the lockstate to the console.
1710  *
1711  */
1712
1713 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1714 {
1715         struct gfs2_glock *gl;
1716         struct hlist_node *h;
1717         unsigned int x;
1718         int error = 0;
1719
1720         for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1721
1722                 read_lock(gl_lock_addr(x));
1723
1724                 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
1725                         if (gl->gl_sbd != sdp)
1726                                 continue;
1727
1728                         error = dump_glock(NULL, gl);
1729                         if (error)
1730                                 break;
1731                 }
1732
1733                 read_unlock(gl_lock_addr(x));
1734
1735                 if (error)
1736                         break;
1737         }
1738
1739
1740         return error;
1741 }
1742
1743
1744 int __init gfs2_glock_init(void)
1745 {
1746         unsigned i;
1747         for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
1748                 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list);
1749         }
1750 #ifdef GL_HASH_LOCK_SZ
1751         for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
1752                 rwlock_init(&gl_hash_locks[i]);
1753         }
1754 #endif
1755
1756         glock_workqueue = create_workqueue("glock_workqueue");
1757         if (IS_ERR(glock_workqueue))
1758                 return PTR_ERR(glock_workqueue);
1759         gfs2_delete_workqueue = create_workqueue("delete_workqueue");
1760         if (IS_ERR(gfs2_delete_workqueue)) {
1761                 destroy_workqueue(glock_workqueue);
1762                 return PTR_ERR(gfs2_delete_workqueue);
1763         }
1764
1765         register_shrinker(&glock_shrinker);
1766
1767         return 0;
1768 }
1769
1770 void gfs2_glock_exit(void)
1771 {
1772         unregister_shrinker(&glock_shrinker);
1773         destroy_workqueue(glock_workqueue);
1774         destroy_workqueue(gfs2_delete_workqueue);
1775 }
1776
1777 static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1778 {
1779         struct gfs2_glock *gl;
1780
1781 restart:
1782         read_lock(gl_lock_addr(gi->hash));
1783         gl = gi->gl;
1784         if (gl) {
1785                 gi->gl = hlist_entry(gl->gl_list.next,
1786                                      struct gfs2_glock, gl_list);
1787         } else {
1788                 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1789                                      struct gfs2_glock, gl_list);
1790         }
1791         if (gi->gl)
1792                 gfs2_glock_hold(gi->gl);
1793         read_unlock(gl_lock_addr(gi->hash));
1794         if (gl)
1795                 gfs2_glock_put(gl);
1796         while (gi->gl == NULL) {
1797                 gi->hash++;
1798                 if (gi->hash >= GFS2_GL_HASH_SIZE)
1799                         return 1;
1800                 read_lock(gl_lock_addr(gi->hash));
1801                 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1802                                      struct gfs2_glock, gl_list);
1803                 if (gi->gl)
1804                         gfs2_glock_hold(gi->gl);
1805                 read_unlock(gl_lock_addr(gi->hash));
1806         }
1807
1808         if (gi->sdp != gi->gl->gl_sbd)
1809                 goto restart;
1810
1811         return 0;
1812 }
1813
1814 static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
1815 {
1816         if (gi->gl)
1817                 gfs2_glock_put(gi->gl);
1818         gi->gl = NULL;
1819 }
1820
1821 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1822 {
1823         struct gfs2_glock_iter *gi = seq->private;
1824         loff_t n = *pos;
1825
1826         gi->hash = 0;
1827
1828         do {
1829                 if (gfs2_glock_iter_next(gi)) {
1830                         gfs2_glock_iter_free(gi);
1831                         return NULL;
1832                 }
1833         } while (n--);
1834
1835         return gi->gl;
1836 }
1837
1838 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1839                                  loff_t *pos)
1840 {
1841         struct gfs2_glock_iter *gi = seq->private;
1842
1843         (*pos)++;
1844
1845         if (gfs2_glock_iter_next(gi)) {
1846                 gfs2_glock_iter_free(gi);
1847                 return NULL;
1848         }
1849
1850         return gi->gl;
1851 }
1852
1853 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1854 {
1855         struct gfs2_glock_iter *gi = seq->private;
1856         gfs2_glock_iter_free(gi);
1857 }
1858
1859 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
1860 {
1861         return dump_glock(seq, iter_ptr);
1862 }
1863
1864 static const struct seq_operations gfs2_glock_seq_ops = {
1865         .start = gfs2_glock_seq_start,
1866         .next  = gfs2_glock_seq_next,
1867         .stop  = gfs2_glock_seq_stop,
1868         .show  = gfs2_glock_seq_show,
1869 };
1870
1871 static int gfs2_debugfs_open(struct inode *inode, struct file *file)
1872 {
1873         int ret = seq_open_private(file, &gfs2_glock_seq_ops,
1874                                    sizeof(struct gfs2_glock_iter));
1875         if (ret == 0) {
1876                 struct seq_file *seq = file->private_data;
1877                 struct gfs2_glock_iter *gi = seq->private;
1878                 gi->sdp = inode->i_private;
1879         }
1880         return ret;
1881 }
1882
1883 static const struct file_operations gfs2_debug_fops = {
1884         .owner   = THIS_MODULE,
1885         .open    = gfs2_debugfs_open,
1886         .read    = seq_read,
1887         .llseek  = seq_lseek,
1888         .release = seq_release_private,
1889 };
1890
1891 int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
1892 {
1893         sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
1894         if (!sdp->debugfs_dir)
1895                 return -ENOMEM;
1896         sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
1897                                                          S_IFREG | S_IRUGO,
1898                                                          sdp->debugfs_dir, sdp,
1899                                                          &gfs2_debug_fops);
1900         if (!sdp->debugfs_dentry_glocks)
1901                 return -ENOMEM;
1902
1903         return 0;
1904 }
1905
1906 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
1907 {
1908         if (sdp && sdp->debugfs_dir) {
1909                 if (sdp->debugfs_dentry_glocks) {
1910                         debugfs_remove(sdp->debugfs_dentry_glocks);
1911                         sdp->debugfs_dentry_glocks = NULL;
1912                 }
1913                 debugfs_remove(sdp->debugfs_dir);
1914                 sdp->debugfs_dir = NULL;
1915         }
1916 }
1917
1918 int gfs2_register_debugfs(void)
1919 {
1920         gfs2_root = debugfs_create_dir("gfs2", NULL);
1921         return gfs2_root ? 0 : -ENOMEM;
1922 }
1923
1924 void gfs2_unregister_debugfs(void)
1925 {
1926         debugfs_remove(gfs2_root);
1927         gfs2_root = NULL;
1928 }