virtio: fix race in enable_cb
authorChristian Borntraeger <borntraeger@de.ibm.com>
Fri, 14 Mar 2008 13:17:05 +0000 (14:17 +0100)
committerRusty Russell <rusty@rustcorp.com.au>
Mon, 17 Mar 2008 11:58:21 +0000 (22:58 +1100)
There is a race in virtio_net, dealing with disabling/enabling the callback.
I saw the following oops:

kernel BUG at /space/kvm/drivers/virtio/virtio_ring.c:218!
illegal operation: 0001 [#1] SMP
Modules linked in: sunrpc dm_mod
CPU: 2 Not tainted 2.6.25-rc1zlive-host-10623-gd358142-dirty #99
Process swapper (pid: 0, task: 000000000f85a610, ksp: 000000000f873c60)
Krnl PSW : 0404300180000000 00000000002b81a6 (vring_disable_cb+0x16/0x20)
           R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:3 PM:0 EA:3
Krnl GPRS: 0000000000000001 0000000000000001 0000000010005800 0000000000000001
           000000000f3a0900 000000000f85a610 0000000000000000 0000000000000000
           0000000000000000 000000000f870000 0000000000000000 0000000000001237
           000000000f3a0920 000000000010ff74 00000000002846f6 000000000fa0bcd8
Krnl Code: 00000000002b819aa7110001           tmll    %r1,1
           00000000002b819ea7840004           brc     8,2b81a6
           00000000002b81a2a7f40001           brc     15,2b81a4
          >00000000002b81a6a51b0001           oill    %r1,1
           00000000002b81aa40102000           sth     %r1,0(%r2)
           00000000002b81ae: 07fe               bcr     15,%r14
           00000000002b81b0eb7ff0380024       stmg    %r7,%r15,56(%r15)
           00000000002b81b6a7f13e00           tmll    %r15,15872
Call Trace:
([<000000000fa0bcd0>] 0xfa0bcd0)
 [<00000000002b8350>] vring_interrupt+0x5c/0x6c
 [<000000000010ab08>] do_extint+0xb8/0xf0
 [<0000000000110716>] ext_no_vtime+0x16/0x1a
 [<0000000000107e72>] cpu_idle+0x1c2/0x1e0

The problem can be triggered with a high amount of host->guest traffic.
I think its the following race:

poll says netif_rx_complete
poll calls enable_cb
enable_cb opens the interrupt mask
a new packet comes, an interrupt is triggered----\
enable_cb sees that there is more work           |
enable_cb disables the interrupt                 |
       .                                         V
       .                            interrupt is delivered
       .                            skb_recv_done does atomic napi test, ok
 some waiting                       disable_cb is called->check fails->bang!
       .
poll would do napi check
poll would do disable_cb

The fix is to let enable_cb not disable the interrupt again, but expect the
caller to do the cleanup if it returns false. In that case, the interrupt is
only disabled, if the napi test_set_bit was successful.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cleaned up doco)
drivers/net/virtio_net.c
drivers/virtio/virtio_ring.c
include/linux/virtio.h

index e575df83e5c21f23ff68028437ae79887119d540..b58472cf76f862c16fa861b02bcb494008c00f90 100644 (file)
@@ -203,8 +203,11 @@ again:
        if (received < budget) {
                netif_rx_complete(vi->dev, napi);
                if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
        if (received < budget) {
                netif_rx_complete(vi->dev, napi);
                if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
-                   && netif_rx_reschedule(vi->dev, napi))
+                   && napi_schedule_prep(napi)) {
+                       vi->rvq->vq_ops->disable_cb(vi->rvq);
+                       __netif_rx_schedule(vi->dev, napi);
                        goto again;
                        goto again;
+               }
        }
 
        return received;
        }
 
        return received;
@@ -278,10 +281,11 @@ again:
                pr_debug("%s: virtio not prepared to send\n", dev->name);
                netif_stop_queue(dev);
 
                pr_debug("%s: virtio not prepared to send\n", dev->name);
                netif_stop_queue(dev);
 
-               /* Activate callback for using skbs: if this fails it
+               /* Activate callback for using skbs: if this returns false it
                 * means some were used in the meantime. */
                if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
                 * means some were used in the meantime. */
                if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
-                       printk("Unlikely: restart svq failed\n");
+                       printk("Unlikely: restart svq race\n");
+                       vi->svq->vq_ops->disable_cb(vi->svq);
                        netif_start_queue(dev);
                        goto again;
                }
                        netif_start_queue(dev);
                        goto again;
                }
index 3a28c1382131d04940035448d24aada5244e7589..aa714028641e630b068f683fc67e6707d6c6a853 100644 (file)
@@ -232,7 +232,6 @@ static bool vring_enable_cb(struct virtqueue *_vq)
        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
        mb();
        if (unlikely(more_used(vq))) {
        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
        mb();
        if (unlikely(more_used(vq))) {
-               vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
                END_USE(vq);
                return false;
        }
                END_USE(vq);
                return false;
        }
index 260d1fcf29a4222c011ab39176b1e2e4d46a98a3..12c18ac1b973caf7fc33f44db0ee7fa759fc361f 100644 (file)
@@ -43,8 +43,9 @@ struct virtqueue
  *     vq: the struct virtqueue we're talking about.
  * @enable_cb: restart callbacks after disable_cb.
  *     vq: the struct virtqueue we're talking about.
  *     vq: the struct virtqueue we're talking about.
  * @enable_cb: restart callbacks after disable_cb.
  *     vq: the struct virtqueue we're talking about.
- *     This returns "false" (and doesn't re-enable) if there are pending
- *     buffers in the queue, to avoid a race.
+ *     This re-enables callbacks; it returns "false" if there are pending
+ *     buffers in the queue, to detect a possible race between the driver
+ *     checking for more work, and enabling callbacks.
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously.
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously.