Documentation/kref.txt

   1
   2 krefs allow you to add reference counters to your objects.  If you
   3 have objects that are used in multiple places and passed around, and
   4 you don't have refcounts, your code is almost certainly broken.  If
   5 you want refcounts, krefs are the way to go.
   6
   7 To use a kref, add one to your data structures like:
   8
   9 struct my_data
  10 {
  11         .
  12         .
  13         struct kref refcount;
  14         .
  15         .
  16 };
  17
  18 The kref can occur anywhere within the data structure.
  19
  20 You must initialize the kref after you allocate it.  To do this, call
  21 kref_init as so:
  22
  23      struct my_data *data;
  24
  25      data = kmalloc(sizeof(*data), GFP_KERNEL);
  26      if (!data)
  27             return -ENOMEM;
  28      kref_init(&data->refcount);
  29
  30 This sets the refcount in the kref to 1.
  31
  32 Once you have an initialized kref, you must follow the following
  33 rules:
  34
  35 1) If you make a non-temporary copy of a pointer, especially if
  36    it can be passed to another thread of execution, you must
  37    increment the refcount with kref_get() before passing it off:
  38        kref_get(&data->refcount);
  39    If you already have a valid pointer to a kref-ed structure (the
  40    refcount cannot go to zero) you may do this without a lock.
  41
  42 2) When you are done with a pointer, you must call kref_put():
  43        kref_put(&data->refcount, data_release);
  44    If this is the last reference to the pointer, the release
  45    routine will be called.  If the code never tries to get
  46    a valid pointer to a kref-ed structure without already
  47    holding a valid pointer, it is safe to do this without
  48    a lock.
  49
  50 3) If the code attempts to gain a reference to a kref-ed structure
  51    without already holding a valid pointer, it must serialize access
  52    where a kref_put() cannot occur during the kref_get(), and the
  53    structure must remain valid during the kref_get().
  54
  55 For example, if you allocate some data and then pass it to another
  56 thread to process:
  57
  58 void data_release(struct kref *ref)
  59 {
  60         struct my_data *data = container_of(ref, struct my_data, refcount);
  61         kfree(data);
  62 }
  63
  64 void more_data_handling(void *cb_data)
  65 {
  66         struct my_data *data = cb_data;
  67         .
  68         . do stuff with data here
  69         .
  70         kref_put(&data->refcount, data_release);
  71 }
  72
  73 int my_data_handler(void)
  74 {
  75         int rv = 0;
  76         struct my_data *data;
  77         struct task_struct *task;
  78         data = kmalloc(sizeof(*data), GFP_KERNEL);
  79         if (!data)
  80                 return -ENOMEM;
  81         kref_init(&data->refcount);
  82
  83         kref_get(&data->refcount);
  84         task = kthread_run(more_data_handling, data, "more_data_handling");
  85         if (task == ERR_PTR(-ENOMEM)) {
  86                 rv = -ENOMEM;
  87                 kref_put(&data->refcount, data_release);
  88                 goto out;
  89         }
  90
  91         .
  92         . do stuff with data here
  93         .
  94  out:
  95         kref_put(&data->refcount, data_release);
  96         return rv;
  97 }
  98
  99 This way, it doesn't matter what order the two threads handle the
 100 data, the kref_put() handles knowing when the data is not referenced
 101 any more and releasing it.  The kref_get() does not require a lock,
 102 since we already have a valid pointer that we own a refcount for.  The
 103 put needs no lock because nothing tries to get the data without
 104 already holding a pointer.
 105
 106 Note that the "before" in rule 1 is very important.  You should never
 107 do something like:
 108
 109         task = kthread_run(more_data_handling, data, "more_data_handling");
 110         if (task == ERR_PTR(-ENOMEM)) {
 111                 rv = -ENOMEM;
 112                 goto out;
 113         } else
 114                 /* BAD BAD BAD - get is after the handoff */
 115                 kref_get(&data->refcount);
 116
 117 Don't assume you know what you are doing and use the above construct.
 118 First of all, you may not know what you are doing.  Second, you may
 119 know what you are doing (there are some situations where locking is
 120 involved where the above may be legal) but someone else who doesn't
 121 know what they are doing may change the code or copy the code.  It's
 122 bad style.  Don't do it.
 123
 124 There are some situations where you can optimize the gets and puts.
 125 For instance, if you are done with an object and enqueuing it for
 126 something else or passing it off to something else, there is no reason
 127 to do a get then a put:
 128
 129         /* Silly extra get and put */
 130         kref_get(&obj->ref);
 131         enqueue(obj);
 132         kref_put(&obj->ref, obj_cleanup);
 133
 134 Just do the enqueue.  A comment about this is always welcome:
 135
 136         enqueue(obj);
 137         /* We are done with obj, so we pass our refcount off
 138            to the queue.  DON'T TOUCH obj AFTER HERE! */
 139
 140 The last rule (rule 3) is the nastiest one to handle.  Say, for
 141 instance, you have a list of items that are each kref-ed, and you wish
 142 to get the first one.  You can't just pull the first item off the list
 143 and kref_get() it.  That violates rule 3 because you are not already
 144 holding a valid pointer.  You must add a mutex (or some other lock).
 145 For instance:
 146
 147 static DEFINE_MUTEX(mutex);
 148 static LIST_HEAD(q);
 149 struct my_data
 150 {
 151         struct kref      refcount;
 152         struct list_head link;
 153 };
 154
 155 static struct my_data *get_entry()
 156 {
 157         struct my_data *entry = NULL;
 158         mutex_lock(&mutex);
 159         if (!list_empty(&q)) {
 160                 entry = container_of(q.next, struct my_data, link);
 161                 kref_get(&entry->refcount);
 162         }
 163         mutex_unlock(&mutex);
 164         return entry;
 165 }
 166
 167 static void release_entry(struct kref *ref)
 168 {
 169         struct my_data *entry = container_of(ref, struct my_data, refcount);
 170
 171         list_del(&entry->link);
 172         kfree(entry);
 173 }
 174
 175 static void put_entry(struct my_data *entry)
 176 {
 177         mutex_lock(&mutex);
 178         kref_put(&entry->refcount, release_entry);
 179         mutex_unlock(&mutex);
 180 }
 181
 182 The kref_put() return value is useful if you do not want to hold the
 183 lock during the whole release operation.  Say you didn't want to call
 184 kfree() with the lock held in the example above (since it is kind of
 185 pointless to do so).  You could use kref_put() as follows:
 186
 187 static void release_entry(struct kref *ref)
 188 {
 189         /* All work is done after the return from kref_put(). */
 190 }
 191
 192 static void put_entry(struct my_data *entry)
 193 {
 194         mutex_lock(&mutex);
 195         if (kref_put(&entry->refcount, release_entry)) {
 196                 list_del(&entry->link);
 197                 mutex_unlock(&mutex);
 198                 kfree(entry);
 199         } else
 200                 mutex_unlock(&mutex);
 201 }
 202
 203 This is really more useful if you have to call other routines as part
 204 of the free operations that could take a long time or might claim the
 205 same lock.  Note that doing everything in the release routine is still
 206 preferred as it is a little neater.
 207
 208
 209 Corey Minyard <minyard@acm.org>
 210
 211 A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
 212 presentation on krefs, which can be found at:
 213   http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
 214 and:
 215   http://www.kroah.com/linux/talks/ols_2004_kref_talk/
 216
 217
 218 The above example could also be optimized using kref_get_unless_zero() in
 219 the following way:
 220
 221 static struct my_data *get_entry()
 222 {
 223         struct my_data *entry = NULL;
 224         mutex_lock(&mutex);
 225         if (!list_empty(&q)) {
 226                 entry = container_of(q.next, struct my_data, link);
 227                 if (!kref_get_unless_zero(&entry->refcount))
 228                         entry = NULL;
 229         }
 230         mutex_unlock(&mutex);
 231         return entry;
 232 }
 233
 234 static void release_entry(struct kref *ref)
 235 {
 236         struct my_data *entry = container_of(ref, struct my_data, refcount);
 237
 238         mutex_lock(&mutex);
 239         list_del(&entry->link);
 240         mutex_unlock(&mutex);
 241         kfree(entry);
 242 }
 243
 244 static void put_entry(struct my_data *entry)
 245 {
 246         kref_put(&entry->refcount, release_entry);
 247 }
 248
 249 Which is useful to remove the mutex lock around kref_put() in put_entry(), but
 250 it's important that kref_get_unless_zero is enclosed in the same critical
 251 section that finds the entry in the lookup table,
 252 otherwise kref_get_unless_zero may reference already freed memory.
 253 Note that it is illegal to use kref_get_unless_zero without checking its
 254 return value. If you are sure (by already having a valid pointer) that
 255 kref_get_unless_zero() will return true, then use kref_get() instead.
 256
 257 The function kref_get_unless_zero also makes it possible to use rcu
 258 locking for lookups in the above example:
 259
 260 struct my_data
 261 {
 262         struct rcu_head rhead;
 263         .
 264         struct kref refcount;
 265         .
 266         .
 267 };
 268
 269 static struct my_data *get_entry_rcu()
 270 {
 271         struct my_data *entry = NULL;
 272         rcu_read_lock();
 273         if (!list_empty(&q)) {
 274                 entry = container_of(q.next, struct my_data, link);
 275                 if (!kref_get_unless_zero(&entry->refcount))
 276                         entry = NULL;
 277         }
 278         rcu_read_unlock();
 279         return entry;
 280 }
 281
 282 static void release_entry_rcu(struct kref *ref)
 283 {
 284         struct my_data *entry = container_of(ref, struct my_data, refcount);
 285
 286         mutex_lock(&mutex);
 287         list_del_rcu(&entry->link);
 288         mutex_unlock(&mutex);
 289         kfree_rcu(entry, rhead);
 290 }
 291
 292 static void put_entry(struct my_data *entry)
 293 {
 294         kref_put(&entry->refcount, release_entry_rcu);
 295 }
 296
 297 But note that the struct kref member needs to remain in valid memory for a
 298 rcu grace period after release_entry_rcu was called. That can be accomplished
 299 by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
 300 before using kfree, but note that synchronize_rcu() may sleep for a
 301 substantial amount of time.
 302
 303
 304 Thomas Hellstrom <thellstrom@vmware.com>