net/mlx5: Introduce blue flame register allocator
authorEli Cohen <eli@mellanox.com>
Tue, 3 Jan 2017 21:55:23 +0000 (23:55 +0200)
committerLeon Romanovsky <leon@kernel.org>
Sun, 8 Jan 2017 09:21:26 +0000 (11:21 +0200)
Here is an implementation of an allocator that allocates blue flame
registers. A blue flame register is used for generating send doorbells.
A blue flame register can be used to generate either a regular doorbell
or a blue flame doorbell where the data to be sent is written to the
device's I/O memory hence saving the need to read the data from memory.
For blue flame kind of doorbells to succeed, the blue flame register
need to be mapped as write combining. The user can specify what kind of
send doorbells she wishes to use. If she requested write combining
mapping but that failed, the allocator will fall back to non write
combining mapping and will indicate that to the user.
Subsequent patches in this series will make use of this allocator.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/uar.c
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h

index ce7fcebb81a36134253f0bb551e58e2053248a45..6a081a8787a7322c2e3013f9c1d82924b8c10dd3 100644 (file)
@@ -231,3 +231,238 @@ void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar)
        mlx5_cmd_free_uar(mdev, uar->index);
 }
 EXPORT_SYMBOL(mlx5_unmap_free_uar);
+
+static int uars_per_sys_page(struct mlx5_core_dev *mdev)
+{
+       if (MLX5_CAP_GEN(mdev, uar_4k))
+               return MLX5_CAP_GEN(mdev, num_of_uars_per_page);
+
+       return 1;
+}
+
+static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
+{
+       u32 system_page_index;
+
+       if (MLX5_CAP_GEN(mdev, uar_4k))
+               system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT);
+       else
+               system_page_index = index;
+
+       return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
+}
+
+static void up_rel_func(struct kref *kref)
+{
+       struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
+
+       list_del(&up->list);
+       if (mlx5_cmd_free_uar(up->mdev, up->index))
+               mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
+       kfree(up->reg_bitmap);
+       kfree(up->fp_bitmap);
+       kfree(up);
+}
+
+static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
+                                             bool map_wc)
+{
+       struct mlx5_uars_page *up;
+       int err = -ENOMEM;
+       phys_addr_t pfn;
+       int bfregs;
+       int i;
+
+       bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
+       up = kzalloc(sizeof(*up), GFP_KERNEL);
+       if (!up)
+               return ERR_PTR(err);
+
+       up->mdev = mdev;
+       up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+       if (!up->reg_bitmap)
+               goto error1;
+
+       up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+       if (!up->fp_bitmap)
+               goto error1;
+
+       for (i = 0; i < bfregs; i++)
+               if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR)
+                       set_bit(i, up->reg_bitmap);
+               else
+                       set_bit(i, up->fp_bitmap);
+
+       up->bfregs = bfregs;
+       up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+       up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+
+       err = mlx5_cmd_alloc_uar(mdev, &up->index);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err);
+               goto error1;
+       }
+
+       pfn = uar2pfn(mdev, up->index);
+       if (map_wc) {
+               up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+               if (!up->map) {
+                       err = -EAGAIN;
+                       goto error2;
+               }
+       } else {
+               up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+               if (!up->map) {
+                       err = -ENOMEM;
+                       goto error2;
+               }
+       }
+       kref_init(&up->ref_count);
+       mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n",
+                     up->index, up->bfregs);
+       return up;
+
+error2:
+       if (mlx5_cmd_free_uar(mdev, up->index))
+               mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
+error1:
+       kfree(up->fp_bitmap);
+       kfree(up->reg_bitmap);
+       kfree(up);
+       return ERR_PTR(err);
+}
+
+static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi)
+{
+       /* return the offset in bytes from the start of the page to the
+        * blue flame area of the UAR
+        */
+       return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE +
+              (dbi % MLX5_BFREGS_PER_UAR) *
+              (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET;
+}
+
+static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+                      bool map_wc, bool fast_path)
+{
+       struct mlx5_bfreg_data *bfregs;
+       struct mlx5_uars_page *up;
+       struct list_head *head;
+       unsigned long *bitmap;
+       unsigned int *avail;
+       struct mutex *lock;  /* pointer to right mutex */
+       int dbi;
+
+       bfregs = &mdev->priv.bfregs;
+       if (map_wc) {
+               head = &bfregs->wc_head.list;
+               lock = &bfregs->wc_head.lock;
+       } else {
+               head = &bfregs->reg_head.list;
+               lock = &bfregs->reg_head.lock;
+       }
+       mutex_lock(lock);
+       if (list_empty(head)) {
+               up = alloc_uars_page(mdev, map_wc);
+               if (IS_ERR(up)) {
+                       mutex_unlock(lock);
+                       return PTR_ERR(up);
+               }
+               list_add(&up->list, head);
+       } else {
+               up = list_entry(head->next, struct mlx5_uars_page, list);
+               kref_get(&up->ref_count);
+       }
+       if (fast_path) {
+               bitmap = up->fp_bitmap;
+               avail = &up->fp_avail;
+       } else {
+               bitmap = up->reg_bitmap;
+               avail = &up->reg_avail;
+       }
+       dbi = find_first_bit(bitmap, up->bfregs);
+       clear_bit(dbi, bitmap);
+       (*avail)--;
+       if (!(*avail))
+               list_del(&up->list);
+
+       bfreg->map = up->map + map_offset(mdev, dbi);
+       bfreg->up = up;
+       bfreg->wc = map_wc;
+       bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR;
+       mutex_unlock(lock);
+
+       return 0;
+}
+
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+                    bool map_wc, bool fast_path)
+{
+       int err;
+
+       err = alloc_bfreg(mdev, bfreg, map_wc, fast_path);
+       if (!err)
+               return 0;
+
+       if (err == -EAGAIN && map_wc)
+               return alloc_bfreg(mdev, bfreg, false, fast_path);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_alloc_bfreg);
+
+static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev,
+                                          struct mlx5_uars_page *up,
+                                          struct mlx5_sq_bfreg *bfreg)
+{
+       unsigned int uar_idx;
+       unsigned int bfreg_idx;
+       unsigned int bf_reg_size;
+
+       bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size);
+
+       uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT;
+       bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size;
+
+       return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx;
+}
+
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg)
+{
+       struct mlx5_bfreg_data *bfregs;
+       struct mlx5_uars_page *up;
+       struct mutex *lock; /* pointer to right mutex */
+       unsigned int dbi;
+       bool fp;
+       unsigned int *avail;
+       unsigned long *bitmap;
+       struct list_head *head;
+
+       bfregs = &mdev->priv.bfregs;
+       if (bfreg->wc) {
+               head = &bfregs->wc_head.list;
+               lock = &bfregs->wc_head.lock;
+       } else {
+               head = &bfregs->reg_head.list;
+               lock = &bfregs->reg_head.lock;
+       }
+       up = bfreg->up;
+       dbi = addr_to_dbi_in_syspage(mdev, up, bfreg);
+       fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR;
+       if (fp) {
+               avail = &up->fp_avail;
+               bitmap = up->fp_bitmap;
+       } else {
+               avail = &up->reg_avail;
+               bitmap = up->reg_bitmap;
+       }
+       mutex_lock(lock);
+       (*avail)++;
+       set_bit(dbi, bitmap);
+       if (*avail == 1)
+               list_add_tail(&up->list, head);
+
+       kref_put(&up->ref_count, up_rel_func);
+       mutex_unlock(lock);
+}
+EXPORT_SYMBOL(mlx5_free_bfreg);
index aa851c51ab59070fe3f090a8ef8f431cbf1d741e..db1b9287012f5023d1252e1ce35c1e0475f18c73 100644 (file)
@@ -215,6 +215,8 @@ enum {
        MLX5_BFREGS_PER_UAR             = 4,
        MLX5_MAX_UARS                   = 1 << 8,
        MLX5_NON_FP_BFREGS_PER_UAR      = 2,
+       MLX5_FP_BFREGS_PER_UAR          = MLX5_BFREGS_PER_UAR -
+                                         MLX5_NON_FP_BFREGS_PER_UAR,
        MLX5_MAX_BFREGS                 = MLX5_MAX_UARS *
                                          MLX5_NON_FP_BFREGS_PER_UAR,
 };
index 3d07e25b3bf1169d22ee4b7b948bbfe932264aaa..969aa1fe17e2395b9a8d4cef5052884a4ac11b32 100644 (file)
@@ -452,6 +452,39 @@ struct mlx5_eq_table {
        spinlock_t              lock;
 };
 
+struct mlx5_uars_page {
+       void __iomem           *map;
+       bool                    wc;
+       u32                     index;
+       struct list_head        list;
+       unsigned int            bfregs;
+       unsigned long          *reg_bitmap; /* for non fast path bf regs */
+       unsigned long          *fp_bitmap;
+       unsigned int            reg_avail;
+       unsigned int            fp_avail;
+       struct kref             ref_count;
+       struct mlx5_core_dev   *mdev;
+};
+
+struct mlx5_bfreg_head {
+       /* protect blue flame registers allocations */
+       struct mutex            lock;
+       struct list_head        list;
+};
+
+struct mlx5_bfreg_data {
+       struct mlx5_bfreg_head  reg_head;
+       struct mlx5_bfreg_head  wc_head;
+};
+
+struct mlx5_sq_bfreg {
+       void __iomem           *map;
+       struct mlx5_uars_page  *up;
+       bool                    wc;
+       u32                     index;
+       unsigned int            offset;
+};
+
 struct mlx5_uar {
        u32                     index;
        struct list_head        bf_list;
@@ -645,6 +678,7 @@ struct mlx5_priv {
        void                   *pfault_ctx;
        struct srcu_struct      pfault_srcu;
 #endif
+       struct mlx5_bfreg_data          bfregs;
 };
 
 enum mlx5_device_state {
@@ -1022,6 +1056,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
 int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
 void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+                    bool map_wc, bool fast_path);
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
index 15f896781966e12b2dcfc64825f83f5e84068c25..1223feff0ea4fc372f50687a20f730d9a8b51cb6 100644 (file)
@@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         uc[0x1];
        u8         rc[0x1];
 
-       u8         reserved_at_240[0xa];
+       u8         uar_4k[0x1];
+       u8         reserved_at_241[0x9];
        u8         uar_sz[0x6];
        u8         reserved_at_250[0x8];
        u8         log_pg_sz[0x8];
@@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         device_frequency_mhz[0x20];
        u8         device_frequency_khz[0x20];
 
-       u8         reserved_at_500[0x80];
+       u8         reserved_at_500[0x20];
+       u8         num_of_uars_per_page[0x20];
+       u8         reserved_at_540[0x40];
 
        u8         reserved_at_580[0x3f];
        u8         cqe_compression[0x1];