net/mlx5: Fix command interface race in polling mode
authorAlex Vesker <valex@mellanox.com>
Tue, 12 Jun 2018 13:14:31 +0000 (16:14 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Tue, 26 Jun 2018 22:26:34 +0000 (15:26 -0700)
The command interface can work in two modes: Events and Polling.
In the general case, each time we invoke a command, a work is
queued to handle it.

When working in events, the interrupt handler completes the
command execution. On the other hand, when working in polling
mode, the work itself completes it.

Due to a bug in the work handler, a command could have been
completed by the interrupt handler, while the work handler
hasn't finished yet, causing the it to complete once again
if the command interface mode was changed from Events to
polling after the interrupt handler was called.

mlx5_unload_one()
        mlx5_stop_eqs()
                // Destroy the EQ before cmd EQ
                ...cmd_work_handler()
                        write_doorbell()
                        --> EVENT_TYPE_CMD
                                mlx5_cmd_comp_handler() // First free
                                        free_ent(cmd, ent->idx)
                                        complete(&ent->done)

        <-- mlx5_stop_eqs //cmd was complete
                // move to polling before destroying the last cmd EQ
                mlx5_cmd_use_polling()
                        cmd->mode = POLL;

                --> cmd_work_handler (continues)
                        if (cmd->mode == POLL)
                                mlx5_cmd_comp_handler() // Double free

The solution is to store the cmd->mode before writing the doorbell.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/cmd.c

index 2db5fe571594db48543ca6a55b26c5d0eecb2719..384c1fa490811ee651919c139b9cd9e724d4ff81 100644 (file)
@@ -807,6 +807,7 @@ static void cmd_work_handler(struct work_struct *work)
        unsigned long flags;
        bool poll_cmd = ent->polling;
        int alloc_ret;
        unsigned long flags;
        bool poll_cmd = ent->polling;
        int alloc_ret;
+       int cmd_mode;
 
        sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
        down(sem);
 
        sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
        down(sem);
@@ -853,6 +854,7 @@ static void cmd_work_handler(struct work_struct *work)
        set_signature(ent, !cmd->checksum_disabled);
        dump_command(dev, ent, 1);
        ent->ts1 = ktime_get_ns();
        set_signature(ent, !cmd->checksum_disabled);
        dump_command(dev, ent, 1);
        ent->ts1 = ktime_get_ns();
+       cmd_mode = cmd->mode;
 
        if (ent->callback)
                schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
 
        if (ent->callback)
                schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
@@ -877,7 +879,7 @@ static void cmd_work_handler(struct work_struct *work)
        iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
        mmiowb();
        /* if not in polling don't use ent after this point */
        iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
        mmiowb();
        /* if not in polling don't use ent after this point */
-       if (cmd->mode == CMD_MODE_POLLING || poll_cmd) {
+       if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
                poll_timeout(ent);
                /* make sure we read the descriptor after ownership is SW */
                rmb();
                poll_timeout(ent);
                /* make sure we read the descriptor after ownership is SW */
                rmb();