config INFINIBAND_BNXT_RE
tristate "Broadcom Netxtreme HCA support"
+ depends on 64BIT
depends on ETHERNET && NETDEVICES && PCI && INET && DCB
- depends on MAY_USE_DEVLINK
select NET_VENDOR_BROADCOM
select BNXT
---help---
.read = debugfs_read,
};
- static int setup_debugfs(struct c4iw_dev *devp)
+ static void setup_debugfs(struct c4iw_dev *devp)
{
- if (!devp->debugfs_root)
- return -1;
-
debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
(void *)devp, &qp_debugfs_fops, 4096);
if (c4iw_wr_log)
debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
(void *)devp, &wr_log_debugfs_fops, 4096);
- return 0;
}
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
static int c4iw_rdev_open(struct c4iw_rdev *rdev)
{
int err;
+ unsigned int factor;
c4iw_init_dev_ucontext(rdev, &rdev->uctx);
return -EINVAL;
}
- rdev->qpmask = rdev->lldi.udb_density - 1;
- rdev->cqmask = rdev->lldi.ucq_density - 1;
+ /* This implementation requires a sge_host_page_size <= PAGE_SIZE. */
+ if (rdev->lldi.sge_host_page_size > PAGE_SIZE) {
+ pr_err("%s: unsupported sge host page size %u\n",
+ pci_name(rdev->lldi.pdev),
+ rdev->lldi.sge_host_page_size);
+ return -EINVAL;
+ }
+
+ factor = PAGE_SIZE / rdev->lldi.sge_host_page_size;
+ rdev->qpmask = (rdev->lldi.udb_density * factor) - 1;
+ rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1;
+
pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
- devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
+ devp = ib_alloc_device(c4iw_dev, ibdev);
if (!devp) {
pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
return err;
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
- if (!c4iw_debugfs_root)
- pr_warn("could not create debugfs entry, continuing\n");
reg_workq = create_singlethread_workqueue("Register_iWARP_device");
if (!reg_workq) {
#include <linux/printk.h>
#include <linux/hrtimer.h>
#include <linux/bitmap.h>
+#include <linux/numa.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
- #define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
struct hfi1_ctxtdata *rcd =
container_of(kref, struct hfi1_ctxtdata, kref);
- hfi1_free_ctxtdata(rcd->dd, rcd);
-
spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
rcd->dd->rcd[rcd->ctxt] = NULL;
spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
+ hfi1_free_ctxtdata(rcd->dd, rcd);
+
kfree(rcd);
}
* @rcd: pointer to an initialized rcd data structure
*
* Use this to get a reference after the init.
+ *
+ * Return : reflect kref_get_unless_zero(), which returns non-zero on
+ * increment, otherwise 0.
*/
- void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+ int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
{
- kref_get(&rcd->kref);
+ return kref_get_unless_zero(&rcd->kref);
}
/**
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd[ctxt]) {
rcd = dd->rcd[ctxt];
- hfi1_rcd_get(rcd);
+ if (!hfi1_rcd_get(rcd))
+ rcd = NULL;
}
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex);
+ spin_lock_init(&rcd->exp_lock);
+ INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
+ INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
+
+ /* Initialize TID flow generations for the context */
+ hfi1_kern_init_ctxt_generations(rcd);
}
*context = rcd;
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc);
hfi1_rcd_put(rcd);
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
+ if (!lastfail)
+ lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
dd->unit = ret;
list_add(&dd->list, &hfi1_dev_list);
}
- dd->node = -1;
+ dd->node = NUMA_NO_NODE;
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
idr_preload_end();
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
+ ret = opfn_init();
+ if (ret < 0) {
+ pr_err("Failed to allocate opfn_wq");
+ goto bail_dev;
+ }
+
+ hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
+ opfn_exit();
node_affinity_destroy_all();
hfi1_dbg_exit();
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) {
- hfi1_clear_tids(rcd);
+ hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd);
}
}
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
- depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
- depends on MAY_USE_DEVLINK
select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
else
profile = &vf_rep_profile;
- ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
return -ENOMEM;
ibdev->mdev = dev;
ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
MLX5_CAP_GEN(dev, num_vhca_ports));
- if (!__mlx5_ib_add(ibdev, profile))
+ if (!__mlx5_ib_add(ibdev, profile)) {
+ ib_dealloc_device(&ibdev->ib_dev);
return -EINVAL;
+ }
rep->rep_if[REP_IB].priv = ibdev;
void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
- int total_vports = MLX5_TOTAL_VPORTS(mdev);
struct mlx5_eswitch_rep_if rep_if = {};
- int vport;
- for (vport = 0; vport < total_vports; vport++) {
- rep_if.load = mlx5_ib_vport_rep_load;
- rep_if.unload = mlx5_ib_vport_rep_unload;
- rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
- mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
- }
+ rep_if.load = mlx5_ib_vport_rep_load;
+ rep_if.unload = mlx5_ib_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+
+ mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB);
}
void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
- int total_vports = MLX5_TOTAL_VPORTS(mdev);
- int vport;
- for (vport = total_vports - 1; vport >= 0; vport--)
- mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
if (pool_size <= 0)
goto err;
ret = -ENOMEM;
- pool = kzalloc(sizeof(struct srp_fr_pool) +
- pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
+ pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL);
if (!pool)
goto err;
pool->size = pool_size;
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = dev->dev;
- dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
- unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+ dma_addr_t dma_addr = sg_dma_address(sg);
+ unsigned int dma_len = sg_dma_len(sg);
unsigned int len = 0;
int ret;
int count)
{
struct srp_target_port *target = ch->target;
- struct srp_device *dev = target->srp_host->srp_dev;
struct scatterlist *sg;
int i;
for_each_sg(scat, sg, count, i) {
- srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
- ib_sg_dma_len(dev->dev, sg),
+ srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg),
target->global_rkey);
}
buf->len = cpu_to_be32(data_len);
WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
for_each_sg(scat, sg, count, i) {
- sge[i].addr = ib_sg_dma_address(ibdev, sg);
- sge[i].length = ib_sg_dma_len(ibdev, sg);
+ sge[i].addr = sg_dma_address(sg);
+ sge[i].length = sg_dma_len(sg);
sge[i].lkey = target->lkey;
}
req->cmd->num_sge += count;
struct srp_direct_buf *buf;
buf = (void *)cmd->add_data + cmd->add_cdb_len;
- buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
+ buf->va = cpu_to_be64(sg_dma_address(scat));
buf->key = cpu_to_be32(target->global_rkey);
- buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
+ buf->len = cpu_to_be32(sg_dma_len(scat));
req->nmdesc = 0;
goto map_complete;
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch;
- int i, j;
u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
if (status)
return FAILED;
- for (i = 0; i < target->ch_count; i++) {
- ch = &target->ch[i];
- for (j = 0; j < target->req_ring_size; ++j) {
- struct srp_request *req = &ch->req_ring[j];
-
- srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
- }
- }
-
return SUCCESS;
}
target_host->max_id = 1;
target_host->max_lun = -1LL;
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
+ target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
target = host_to_target(target_host);
struct srp_device *srp_dev;
struct ib_device_attr *attr = &device->attrs;
struct srp_host *host;
- int mr_page_shift, p;
+ int mr_page_shift;
+ unsigned int p;
u64 max_pages_per_mr;
unsigned int flags = 0;
WARN_ON_ONCE(srp_dev->global_rkey == 0);
}
- for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
+ rdma_for_each_port (device, p) {
host = srp_add_port(srp_dev, p);
if (host)
list_add_tail(&host->list, &srp_dev->dev_list);
*/
/*
- * shift for keeping value range suitable for 32-bit integer arithmetics
+ * shift for keeping value range suitable for 32-bit integer arithmetic
*/
#define LIMIT_SHIFT 8
unsigned int pages = DIV_ROUND_UP(vb->planes[0].length, CIO2_PAGE_SIZE);
unsigned int lops = DIV_ROUND_UP(pages + 1, entries_per_page);
struct sg_table *sg;
- struct sg_page_iter sg_iter;
+ struct sg_dma_page_iter sg_iter;
int i, j;
if (lops <= 0 || lops > CIO2_MAX_LOPS) {
b->offset = sg->sgl->offset;
i = j = 0;
- for_each_sg_page(sg->sgl, &sg_iter, sg->nents, 0) {
+ for_each_sg_dma_page (sg->sgl, &sg_iter, sg->nents, 0) {
if (!pages--)
break;
b->lop[i][j] = sg_page_iter_dma_address(&sg_iter) >> PAGE_SHIFT;
/* Register notifier for subdevices we care */
r = cio2_notifier_init(cio2);
- if (r)
+ /* Proceed without sensors connected to allow the device to suspend. */
+ if (r && r != -ENODEV)
goto fail_cio2_queue_exit;
r = devm_request_irq(&pci_dev->dev, pci_dev->irq, cio2_irq,
MODULE_AUTHOR("Tuukka Toivonen <tuukka.toivonen@intel.com>");
MODULE_AUTHOR("Tianshu Qiu <tian.shu.qiu@intel.com>");
-MODULE_AUTHOR("Jian Xu Zheng <jian.xu.zheng@intel.com>");
+MODULE_AUTHOR("Jian Xu Zheng");
MODULE_AUTHOR("Yuning Pu <yuning.pu@intel.com>");
MODULE_AUTHOR("Yong Zhi <yong.zhi@intel.com>");
MODULE_LICENSE("GPL v2");
static inline int
__scif_dec_pinned_vm_lock(struct mm_struct *mm,
- int nr_pages, bool try_lock)
+ int nr_pages)
{
if (!mm || !nr_pages || !scif_ulimit_check)
return 0;
- if (try_lock) {
- if (!down_write_trylock(&mm->mmap_sem)) {
- dev_err(scif_info.mdev.this_device,
- "%s %d err\n", __func__, __LINE__);
- return -1;
- }
- } else {
- down_write(&mm->mmap_sem);
- }
- mm->pinned_vm -= nr_pages;
- up_write(&mm->mmap_sem);
+
+ atomic64_sub(nr_pages, &mm->pinned_vm);
return 0;
}
if (!mm || !nr_pages || !scif_ulimit_check)
return 0;
- locked = nr_pages;
- locked += mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ locked = atomic64_add_return(nr_pages, &mm->pinned_vm);
+
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ atomic64_sub(nr_pages, &mm->pinned_vm);
dev_err(scif_info.mdev.this_device,
"locked(%lu) > lock_limit(%lu)\n",
locked, lock_limit);
return -ENOMEM;
}
- mm->pinned_vm = locked;
return 0;
}
might_sleep();
if (!window->temp && window->mm) {
- __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+ __scif_dec_pinned_vm_lock(window->mm, window->nr_pages);
__scif_release_mm(window->mm);
window->mm = NULL;
}
{
window->unreg_state = OP_IN_PROGRESS;
send_msg = true;
- /* fall through */
}
+ /* fall through */
case OP_IN_PROGRESS:
{
scif_get_window(window, 1);
ep->rma_info.dma_chan);
} else {
if (!__scif_dec_pinned_vm_lock(window->mm,
- window->nr_pages, 1)) {
+ window->nr_pages)) {
__scif_release_mm(window->mm);
window->mm = NULL;
}
prot |= SCIF_PROT_WRITE;
retry:
mm = current->mm;
- down_write(&mm->mmap_sem);
if (ulimit) {
err = __scif_check_inc_pinned_vm(mm, nr_pages);
if (err) {
- up_write(&mm->mmap_sem);
pinned_pages->nr_pages = 0;
goto error_unmap;
}
}
- pinned_pages->nr_pages = get_user_pages(
+ pinned_pages->nr_pages = get_user_pages_fast(
(u64)addr,
nr_pages,
(prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
- pinned_pages->pages,
- NULL);
- up_write(&mm->mmap_sem);
+ pinned_pages->pages);
if (nr_pages != pinned_pages->nr_pages) {
if (try_upgrade) {
if (ulimit)
- __scif_dec_pinned_vm_lock(mm,
- nr_pages, 0);
+ __scif_dec_pinned_vm_lock(mm, nr_pages);
/* Roll back any pinned pages */
for (i = 0; i < pinned_pages->nr_pages; i++) {
if (pinned_pages->pages[i])
return err;
dec_pinned:
if (ulimit)
- __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+ __scif_dec_pinned_vm_lock(mm, nr_pages);
/* Something went wrong! Rollback */
error_unmap:
pinned_pages->nr_pages = nr_pages;
void *set_hca_cap;
void *set_ctx;
int set_sz;
+ bool do_set = false;
int err;
- if (!MLX5_CAP_GEN(dev, pg))
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
+ !MLX5_CAP_GEN(dev, pg))
return 0;
err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
if (err)
return err;
- if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) ||
- MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) ||
- MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)))
- return 0;
-
set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
set_ctx = kzalloc(set_sz, GFP_KERNEL);
if (!set_ctx)
memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
MLX5_ST_SZ_BYTES(odp_cap));
- /* set ODP SRQ support for RC/UD and XRC transports */
- MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive,
- MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive));
-
- MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive,
- MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive));
-
- MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive,
- MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive));
-
- err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+ #define ODP_CAP_SET_MAX(dev, field) \
+ do { \
+ u32 _res = MLX5_CAP_ODP_MAX(dev, field); \
+ if (_res) { \
+ do_set = true; \
+ MLX5_SET(odp_cap, set_hca_cap, field, _res); \
+ } \
+ } while (0)
+
+ ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
+ ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
+
+ if (do_set)
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_ODP);
kfree(set_ctx);
+
return err;
}
return err;
}
+ static int set_hca_cap(struct mlx5_core_dev *dev)
+ {
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ err = handle_hca_cap(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap failed\n");
+ goto out;
+ }
+
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto out;
+ }
+
+ err = handle_hca_cap_odp(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+ goto out;
+ }
+
+ out:
+ return err;
+ }
+
static int set_hca_ctrl(struct mlx5_core_dev *dev)
{
struct mlx5_reg_host_endianness he_in;
goto reclaim_boot_pages;
}
- err = handle_hca_cap(dev);
- if (err) {
- dev_err(&pdev->dev, "handle_hca_cap failed\n");
- goto reclaim_boot_pages;
- }
-
- err = handle_hca_cap_atomic(dev);
+ err = set_hca_cap(dev);
if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
- goto reclaim_boot_pages;
- }
-
- err = handle_hca_cap_odp(dev);
- if (err) {
- dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+ dev_err(&pdev->dev, "set_hca_cap failed\n");
goto reclaim_boot_pages;
}
{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */
{ PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */
{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
+ { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */
+ { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ 0, }
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
- SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+ SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
};
static void smaps_account(struct mem_size_stats *mss, struct page *page,
- bool compound, bool young, bool dirty)
+ bool compound, bool young, bool dirty, bool locked)
{
int i, nr = compound ? 1 << compound_order(page) : 1;
unsigned long size = nr * PAGE_SIZE;
else
mss->private_clean += size;
mss->pss += (u64)size << PSS_SHIFT;
+ if (locked)
+ mss->pss_locked += (u64)size << PSS_SHIFT;
return;
}
for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page);
+ unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
if (mapcount >= 2) {
if (dirty || PageDirty(page))
mss->shared_dirty += PAGE_SIZE;
else
mss->shared_clean += PAGE_SIZE;
- mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+ mss->pss += pss / mapcount;
+ if (locked)
+ mss->pss_locked += pss / mapcount;
} else {
if (dirty || PageDirty(page))
mss->private_dirty += PAGE_SIZE;
else
mss->private_clean += PAGE_SIZE;
- mss->pss += PAGE_SIZE << PSS_SHIFT;
+ mss->pss += pss;
+ if (locked)
+ mss->pss_locked += pss;
}
}
}
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
if (pte_present(*pte)) {
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
+ smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
+ bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page;
/* FOLL_DUMP will return -EFAULT on huge zero page */
/* pass */;
else
VM_BUG_ON_PAGE(1, page);
- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
}
}
#endif
-
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
- if (vma->vm_flags & VM_LOCKED)
- mss->pss_locked += mss->pss;
}
#define SEQ_PUT_DEC(str, val) \
pte_t ptent = *pte;
if (pte_present(ptent)) {
- ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
- ptent = pte_wrprotect(ptent);
+ pte_t old_pte;
+
+ old_pte = ptep_modify_prot_start(vma, addr, pte);
+ ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
- ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
+ ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
enum mlx5_dev_event {
MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */
+ MLX5_DEV_EVENT_PORT_AFFINITY = 129,
};
enum mlx5_port_status {
enum {
MLX5_MKEY_MR = 1,
MLX5_MKEY_MW,
+ MLX5_MKEY_INDIRECT_DEVX,
};
struct mlx5_core_mkey {
struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz);
- #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
- u32 wq_num, u8 type, int error);
- #endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager);
}
+static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
+{
+ return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
+}
+
#define MLX5_HOST_PF_MAX_VFS (127u)
static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
{
#endif
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
-typedef int vm_fault_t;
struct address_space;
struct mem_cgroup;
struct { /* Page cache and anonymous pages */
/**
* @lru: Pageout list, eg. active_list protected by
- * zone_lru_lock. Sometimes used as a generic list
+ * pgdat->lru_lock. Sometimes used as a generic list
* by the page owner.
*/
struct list_head lru;
*/
unsigned long private;
};
+ struct { /* page_pool used by netstack */
+ /**
+ * @dma_addr: might require a 64-bit value even on
+ * 32-bit architectures.
+ */
+ dma_addr_t dma_addr;
+ };
struct { /* slab, slob and slub */
union {
struct list_head slab_list; /* uses lru */
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */
- unsigned long pinned_vm; /* Refcount permanently increased */
+ atomic64_t pinned_vm; /* Refcount permanently increased */
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
unsigned long stack_vm; /* VM_STACK */
struct vm_fault;
+/**
+ * typedef vm_fault_t - Return type for page fault handlers.
+ *
+ * Page fault handlers return a bitmask of %VM_FAULT values.
+ */
+typedef __bitwise unsigned int vm_fault_t;
+
+/**
+ * enum vm_fault_reason - Page fault handlers return a bitmask of
+ * these values to tell the core VM what happened when handling the
+ * fault. Used to decide whether a process gets delivered SIGBUS or
+ * just gets major/minor fault counters bumped up.
+ *
+ * @VM_FAULT_OOM: Out Of Memory
+ * @VM_FAULT_SIGBUS: Bad access
+ * @VM_FAULT_MAJOR: Page read from storage
+ * @VM_FAULT_WRITE: Special case for get_user_pages
+ * @VM_FAULT_HWPOISON: Hit poisoned small page
+ * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded
+ * in upper bits
+ * @VM_FAULT_SIGSEGV: segmentation fault
+ * @VM_FAULT_NOPAGE: ->fault installed the pte, not return page
+ * @VM_FAULT_LOCKED: ->fault locked the returned page
+ * @VM_FAULT_RETRY: ->fault blocked, must retry
+ * @VM_FAULT_FALLBACK: huge page fault failed, fall back to small
+ * @VM_FAULT_DONE_COW: ->fault has fully handled COW
+ * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs
+ * fsync() to complete (for synchronous page faults
+ * in DAX)
+ * @VM_FAULT_HINDEX_MASK: mask HINDEX value
+ *
+ */
+enum vm_fault_reason {
+ VM_FAULT_OOM = (__force vm_fault_t)0x000001,
+ VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002,
+ VM_FAULT_MAJOR = (__force vm_fault_t)0x000004,
+ VM_FAULT_WRITE = (__force vm_fault_t)0x000008,
+ VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010,
+ VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
+ VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040,
+ VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100,
+ VM_FAULT_LOCKED = (__force vm_fault_t)0x000200,
+ VM_FAULT_RETRY = (__force vm_fault_t)0x000400,
+ VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800,
+ VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
+ VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
+ VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
+};
+
+/* Encode hstate index for a hwpoisoned large page */
+#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
+#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+
+#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \
+ VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \
+ VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
+
+#define VM_FAULT_RESULT_TRACE \
+ { VM_FAULT_OOM, "OOM" }, \
+ { VM_FAULT_SIGBUS, "SIGBUS" }, \
+ { VM_FAULT_MAJOR, "MAJOR" }, \
+ { VM_FAULT_WRITE, "WRITE" }, \
+ { VM_FAULT_HWPOISON, "HWPOISON" }, \
+ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \
+ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \
+ { VM_FAULT_NOPAGE, "NOPAGE" }, \
+ { VM_FAULT_LOCKED, "LOCKED" }, \
+ { VM_FAULT_RETRY, "RETRY" }, \
+ { VM_FAULT_FALLBACK, "FALLBACK" }, \
+ { VM_FAULT_DONE_COW, "DONE_COW" }, \
+ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }
+
struct vm_special_mapping {
const char *name; /* The name, e.g. "[vdso]". */
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance events core code:
*
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * For licensing details see kernel-base/COPYING
*/
#include <linux/fs.h>
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
+static atomic_t nr_ksymbol_events __read_mostly;
+static atomic_t nr_bpf_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static void get_ctx(struct perf_event_context *ctx)
{
- WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
+ refcount_inc(&ctx->refcount);
}
static void free_ctx(struct rcu_head *head)
static void put_ctx(struct perf_event_context *ctx)
{
- if (atomic_dec_and_test(&ctx->refcount)) {
+ if (refcount_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
put_ctx(ctx->parent_ctx);
if (ctx->task && ctx->task != TASK_TOMBSTONE)
* perf_event_context::lock
* perf_event::mmap_mutex
* mmap_sem
+ * perf_addr_filters_head::lock
*
* cpu_hotplug_lock
* pmus_lock
again:
rcu_read_lock();
ctx = READ_ONCE(event->ctx);
- if (!atomic_inc_not_zero(&ctx->refcount)) {
+ if (!refcount_inc_not_zero(&ctx->refcount)) {
rcu_read_unlock();
goto again;
}
}
if (ctx->task == TASK_TOMBSTONE ||
- !atomic_inc_not_zero(&ctx->refcount)) {
+ !refcount_inc_not_zero(&ctx->refcount)) {
raw_spin_unlock(&ctx->lock);
ctx = NULL;
} else {
*
* (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
* we update the addresses of corresponding vmas in
- * event::addr_filters_offs array and bump the event::addr_filters_gen;
+ * event::addr_filter_ranges array and bump the event::addr_filters_gen;
* (p2) when an event is scheduled in (pmu::add), it calls
* perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
* if the generation has changed since the previous call.
INIT_LIST_HEAD(&ctx->event_list);
INIT_LIST_HEAD(&ctx->pinned_active);
INIT_LIST_HEAD(&ctx->flexible_active);
- atomic_set(&ctx->refcount, 1);
+ refcount_set(&ctx->refcount, 1);
}
static struct perf_event_context *
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
attr->comm || attr->comm_exec ||
- attr->task ||
+ attr->task || attr->ksymbol ||
attr->context_switch)
return true;
return false;
dec = true;
if (has_branch_stack(event))
dec = true;
+ if (event->attr.ksymbol)
+ atomic_dec(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_dec(&nr_bpf_events);
if (dec) {
if (!atomic_add_unless(&perf_sched_count, -1, 1))
perf_event_free_bpf_prog(event);
perf_addr_filters_splice(event, NULL);
- kfree(event->addr_filters_offs);
+ kfree(event->addr_filter_ranges);
if (event->destroy)
event->destroy(event);
}
}
+static int perf_event_check_period(struct perf_event *event, u64 value)
+{
+ return event->pmu->check_period(event, value);
+}
+
static int perf_event_period(struct perf_event *event, u64 __user *arg)
{
u64 value;
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
return -EINVAL;
+ if (perf_event_check_period(event, value))
+ return -EINVAL;
+
event_function_call(event, __perf_event_period, &value);
return 0;
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
- if (!atomic_inc_not_zero(&rb->refcount))
+ if (!refcount_inc_not_zero(&rb->refcount))
rb = NULL;
}
rcu_read_unlock();
void ring_buffer_put(struct ring_buffer *rb)
{
- if (!atomic_dec_and_test(&rb->refcount))
+ if (!refcount_dec_and_test(&rb->refcount))
return;
WARN_ON_ONCE(!list_empty(&rb->event_list));
/* now it's safe to free the pages */
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
- vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+ atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
/* this has to be the last one */
rb_free_aux(rb);
- WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
+ WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
mutex_unlock(&event->mmap_mutex);
}
*/
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
- vma->vm_mm->pinned_vm -= mmap_locked;
+ atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);
out_put:
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
- locked = vma->vm_mm->pinned_vm + extra;
+ locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
!capable(CAP_IPC_LOCK)) {
unlock:
if (!ret) {
atomic_long_add(user_extra, &user->locked_vm);
- vma->vm_mm->pinned_vm += extra;
+ atomic64_add(extra, &vma->vm_mm->pinned_vm);
atomic_inc(&event->mmap_count);
} else if (rb) {
data->phys_addr = perf_virt_to_phys(data->addr);
}
-static __always_inline void
+static __always_inline int
__perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs,
{
struct perf_output_handle handle;
struct perf_event_header header;
+ int err;
/* protect the callchain buffers */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
- if (output_begin(&handle, event, header.size))
+ err = output_begin(&handle, event, header.size);
+ if (err)
goto exit;
perf_output_sample(&handle, &header, data, event);
exit:
rcu_read_unlock();
+ return err;
}
void
__perf_event_output(event, data, regs, perf_output_begin_backward);
}
-void
+int
perf_event_output(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
- __perf_event_output(event, data, regs, perf_output_begin);
+ return __perf_event_output(event, data, regs, perf_output_begin);
}
/*
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
if (filter->path.dentry) {
- event->addr_filters_offs[count] = 0;
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
restart++;
}
return true;
}
+static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
+ struct vm_area_struct *vma,
+ struct perf_addr_filter_range *fr)
+{
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ struct file *file = vma->vm_file;
+
+ if (!perf_addr_filter_match(filter, file, off, vma_size))
+ return false;
+
+ if (filter->offset < off) {
+ fr->start = vma->vm_start;
+ fr->size = min(vma_size, filter->size - (off - filter->offset));
+ } else {
+ fr->start = vma->vm_start + filter->offset - off;
+ fr->size = min(vma->vm_end - fr->start, filter->size);
+ }
+
+ return true;
+}
+
static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
{
struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
struct vm_area_struct *vma = data;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
- struct file *file = vma->vm_file;
struct perf_addr_filter *filter;
unsigned int restart = 0, count = 0;
+ unsigned long flags;
if (!has_addr_filter(event))
return;
- if (!file)
+ if (!vma->vm_file)
return;
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- if (perf_addr_filter_match(filter, file, off,
- vma->vm_end - vma->vm_start)) {
- event->addr_filters_offs[count] = vma->vm_start;
+ if (perf_addr_filter_vma_adjust(filter, vma,
+ &event->addr_filter_ranges[count]))
restart++;
- }
count++;
}
perf_output_end(&handle);
}
+/*
+ * ksymbol register/unregister tracking
+ */
+
+struct perf_ksymbol_event {
+ const char *name;
+ int name_len;
+ struct {
+ struct perf_event_header header;
+ u64 addr;
+ u32 len;
+ u16 ksym_type;
+ u16 flags;
+ } event_id;
+};
+
+static int perf_event_ksymbol_match(struct perf_event *event)
+{
+ return event->attr.ksymbol;
+}
+
+static void perf_event_ksymbol_output(struct perf_event *event, void *data)
+{
+ struct perf_ksymbol_event *ksymbol_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_ksymbol_match(event))
+ return;
+
+ perf_event_header__init_id(&ksymbol_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ ksymbol_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, ksymbol_event->event_id);
+ __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
+ const char *sym)
+{
+ struct perf_ksymbol_event ksymbol_event;
+ char name[KSYM_NAME_LEN];
+ u16 flags = 0;
+ int name_len;
+
+ if (!atomic_read(&nr_ksymbol_events))
+ return;
+
+ if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
+ ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
+ goto err;
+
+ strlcpy(name, sym, KSYM_NAME_LEN);
+ name_len = strlen(name) + 1;
+ while (!IS_ALIGNED(name_len, sizeof(u64)))
+ name[name_len++] = '\0';
+ BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
+
+ if (unregister)
+ flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
+
+ ksymbol_event = (struct perf_ksymbol_event){
+ .name = name,
+ .name_len = name_len,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = sizeof(ksymbol_event.event_id) +
+ name_len,
+ },
+ .addr = addr,
+ .len = len,
+ .ksym_type = ksym_type,
+ .flags = flags,
+ },
+ };
+
+ perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
+ return;
+err:
+ WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
+}
+
+/*
+ * bpf program load/unload tracking
+ */
+
+struct perf_bpf_event {
+ struct bpf_prog *prog;
+ struct {
+ struct perf_event_header header;
+ u16 type;
+ u16 flags;
+ u32 id;
+ u8 tag[BPF_TAG_SIZE];
+ } event_id;
+};
+
+static int perf_event_bpf_match(struct perf_event *event)
+{
+ return event->attr.bpf_event;
+}
+
+static void perf_event_bpf_output(struct perf_event *event, void *data)
+{
+ struct perf_bpf_event *bpf_event = data;
+ struct perf_output_handle handle;
+ struct perf_sample_data sample;
+ int ret;
+
+ if (!perf_event_bpf_match(event))
+ return;
+
+ perf_event_header__init_id(&bpf_event->event_id.header,
+ &sample, event);
+ ret = perf_output_begin(&handle, event,
+ bpf_event->event_id.header.size);
+ if (ret)
+ return;
+
+ perf_output_put(&handle, bpf_event->event_id);
+ perf_event__output_id_sample(event, &handle, &sample);
+
+ perf_output_end(&handle);
+}
+
+static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
+ enum perf_bpf_event_type type)
+{
+ bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
+ char sym[KSYM_NAME_LEN];
+ int i;
+
+ if (prog->aux->func_cnt == 0) {
+ bpf_get_prog_name(prog, sym);
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)prog->bpf_func,
+ prog->jited_len, unregister, sym);
+ } else {
+ for (i = 0; i < prog->aux->func_cnt; i++) {
+ struct bpf_prog *subprog = prog->aux->func[i];
+
+ bpf_get_prog_name(subprog, sym);
+ perf_event_ksymbol(
+ PERF_RECORD_KSYMBOL_TYPE_BPF,
+ (u64)(unsigned long)subprog->bpf_func,
+ subprog->jited_len, unregister, sym);
+ }
+ }
+}
+
+void perf_event_bpf_event(struct bpf_prog *prog,
+ enum perf_bpf_event_type type,
+ u16 flags)
+{
+ struct perf_bpf_event bpf_event;
+
+ if (type <= PERF_BPF_EVENT_UNKNOWN ||
+ type >= PERF_BPF_EVENT_MAX)
+ return;
+
+ switch (type) {
+ case PERF_BPF_EVENT_PROG_LOAD:
+ case PERF_BPF_EVENT_PROG_UNLOAD:
+ if (atomic_read(&nr_ksymbol_events))
+ perf_event_bpf_emit_ksymbols(prog, type);
+ break;
+ default:
+ break;
+ }
+
+ if (!atomic_read(&nr_bpf_events))
+ return;
+
+ bpf_event = (struct perf_bpf_event){
+ .prog = prog,
+ .event_id = {
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(bpf_event.event_id),
+ },
+ .type = type,
+ .flags = flags,
+ .id = prog->aux->id,
+ },
+ };
+
+ BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
+
+ memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
+ perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+}
+
void perf_event_itrace_started(struct perf_event *event)
{
event->attach_state |= PERF_ATTACH_ITRACE;
* @filter; if so, adjust filter's address range.
* Called with mm::mmap_sem down for reading.
*/
-static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
- struct mm_struct *mm)
+static void perf_addr_filter_apply(struct perf_addr_filter *filter,
+ struct mm_struct *mm,
+ struct perf_addr_filter_range *fr)
{
struct vm_area_struct *vma;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
- struct file *file = vma->vm_file;
- unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
-
- if (!file)
- continue;
-
- if (!perf_addr_filter_match(filter, file, off, vma_size))
+ if (!vma->vm_file)
continue;
- return vma->vm_start;
+ if (perf_addr_filter_vma_adjust(filter, vma, fr))
+ return;
}
-
- return 0;
}
/*
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- event->addr_filters_offs[count] = 0;
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
/*
* Adjust base offset if the filter is associated to a binary
* that needs to be mapped:
*/
if (filter->path.dentry)
- event->addr_filters_offs[count] =
- perf_addr_filter_apply(filter, mm);
+ perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
count++;
}
return 0;
}
+static int perf_event_nop_int(struct perf_event *event, u64 value)
+{
+ return 0;
+}
+
static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
pmu->pmu_disable = perf_pmu_nop_void;
}
+ if (!pmu->check_period)
+ pmu->check_period = perf_event_nop_int;
+
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;
if (ctx)
perf_event_ctx_unlock(event->group_leader, ctx);
+ if (!ret) {
+ if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
+ event_has_any_exclude_flag(event)) {
+ if (event->destroy)
+ event->destroy(event);
+ ret = -EINVAL;
+ }
+ }
+
if (ret)
module_put(pmu->module);
inc = true;
if (is_cgroup_event(event))
inc = true;
+ if (event->attr.ksymbol)
+ atomic_inc(&nr_ksymbol_events);
+ if (event->attr.bpf_event)
+ atomic_inc(&nr_bpf_events);
if (inc) {
/*
goto err_pmu;
if (has_addr_filter(event)) {
- event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
- sizeof(unsigned long),
- GFP_KERNEL);
- if (!event->addr_filters_offs) {
+ event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
+ sizeof(struct perf_addr_filter_range),
+ GFP_KERNEL);
+ if (!event->addr_filter_ranges) {
err = -ENOMEM;
goto err_per_task;
}
+ /*
+ * Clone the parent's vma offsets: they are valid until exec()
+ * even if the mm is not shared with the parent.
+ */
+ if (event->parent) {
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+
+ raw_spin_lock_irq(&ifh->lock);
+ memcpy(event->addr_filter_ranges,
+ event->parent->addr_filter_ranges,
+ pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
+ raw_spin_unlock_irq(&ifh->lock);
+ }
+
/* force hw sync on the address filters */
event->addr_filters_gen = 1;
}
return event;
err_addr_filters:
- kfree(event->addr_filters_offs);
+ kfree(event->addr_filter_ranges);
err_per_task:
exclusive_event_destroy(event);
again:
rcu_read_lock();
gctx = READ_ONCE(group_leader->ctx);
- if (!atomic_inc_not_zero(&gctx->refcount)) {
+ if (!refcount_inc_not_zero(&gctx->refcount)) {
rcu_read_unlock();
goto again;
}
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
#include <linux/magic.h>
-#include <linux/sched/mm.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
#ifdef CONFIG_THREAD_INFO_IN_TASK
void put_task_stack(struct task_struct *tsk)
{
- if (atomic_dec_and_test(&tsk->stack_refcount))
+ if (refcount_dec_and_test(&tsk->stack_refcount))
release_task_stack(tsk);
}
#endif
* If the task had a separate stack allocation, it should be gone
* by now.
*/
- WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+ WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
#endif
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
static inline void put_signal_struct(struct signal_struct *sig)
{
- if (atomic_dec_and_test(&sig->sigcnt))
+ if (refcount_dec_and_test(&sig->sigcnt))
free_signal_struct(sig);
}
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
- WARN_ON(atomic_read(&tsk->usage));
+ WARN_ON(refcount_read(&tsk->usage));
WARN_ON(tsk == current);
cgroup_free(tsk);
tsk->stack_vm_area = stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
- atomic_set(&tsk->stack_refcount, 1);
+ refcount_set(&tsk->stack_refcount, 1);
#endif
if (err)
* One for us, one for whoever does the "release_task()" (usually
* parent)
*/
- atomic_set(&tsk->usage, 2);
+ refcount_set(&tsk->usage, 2);
#ifdef CONFIG_BLK_DEV_IO_TRACE
tsk->btrace_seq = 0;
#endif
mm_pgtables_bytes_init(mm);
mm->map_count = 0;
mm->locked_vm = 0;
- mm->pinned_vm = 0;
+ atomic64_set(&mm->pinned_vm, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
spin_lock_init(&mm->arg_lock);
struct sighand_struct *sig;
if (clone_flags & CLONE_SIGHAND) {
- atomic_inc(¤t->sighand->count);
+ refcount_inc(¤t->sighand->count);
return 0;
}
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
if (!sig)
return -ENOMEM;
- atomic_set(&sig->count, 1);
+ refcount_set(&sig->count, 1);
spin_lock_irq(¤t->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
spin_unlock_irq(¤t->sighand->siglock);
void __cleanup_sighand(struct sighand_struct *sighand)
{
- if (atomic_dec_and_test(&sighand->count)) {
+ if (refcount_dec_and_test(&sighand->count)) {
signalfd_cleanup(sighand);
/*
* sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
sig->nr_threads = 1;
atomic_set(&sig->live, 1);
- atomic_set(&sig->sigcnt, 1);
+ refcount_set(&sig->sigcnt, 1);
/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
} else {
current->signal->nr_threads++;
atomic_inc(¤t->signal->live);
- atomic_inc(¤t->signal->sigcnt);
+ refcount_inc(¤t->signal->sigcnt);
task_join_group_stop(p);
list_add_tail_rcu(&p->thread_group,
&p->group_leader->thread_group);
return -EINVAL;
}
if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
- if (atomic_read(¤t->sighand->count) > 1)
+ if (refcount_read(¤t->sighand->count) > 1)
return -EINVAL;
}
if (unshare_flags & CLONE_VM) {
void __dump_page(struct page *page, const char *reason)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping;
bool page_poisoned = PagePoisoned(page);
int mapcount;
goto hex_only;
}
+ mapping = page_mapping(page);
+
/*
* Avoid VM_BUG_ON() in page_mapcount().
* page->_mapcount space in struct page is used by sl[aou]b pages to
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
- "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
+ "pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
"start_brk %lx brk %lx start_stack %lx\n"
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
mm_pgtables_bytes(mm),
mm->map_count,
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
- mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
+ atomic64_read(&mm->pinned_vm),
+ mm->data_vm, mm->exec_vm, mm->stack_vm,
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
mm->start_brk, mm->brk, mm->start_stack,
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
u8 ricpc_protocol_major;
u8 ricpc_protocol_minor;
__be16 ricpc_protocol_minor_mask; /* bitmask */
- __be32 ricpc_reserved1;
+ u8 ricpc_dp_toss;
+ u8 ripc_reserved1;
+ __be16 ripc_reserved2;
__be64 ricpc_ack_seq;
__be32 ricpc_credit; /* non-zero enables flow ctl */
};
unsigned int i;
for_each_sg(sglist, sg, sg_dma_len, i) {
- ib_dma_sync_single_for_cpu(dev,
- ib_sg_dma_address(dev, sg),
- ib_sg_dma_len(dev, sg),
- direction);
+ ib_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+ sg_dma_len(sg), direction);
}
}
#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
unsigned int i;
for_each_sg(sglist, sg, sg_dma_len, i) {
- ib_dma_sync_single_for_device(dev,
- ib_sg_dma_address(dev, sg),
- ib_sg_dma_len(dev, sg),
- direction);
+ ib_dma_sync_single_for_device(dev, sg_dma_address(sg),
+ sg_dma_len(sg), direction);
}
}
#define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device
sge->length = sizeof(struct rds_header);
sge = &recv->r_sge[1];
- sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
- sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
+ sge->addr = sg_dma_address(&recv->r_frag->f_sg);
+ sge->length = sg_dma_len(&recv->r_frag->f_sg);
ret = 0;
out:
rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
- (long) ib_sg_dma_address(
- ic->i_cm_id->device,
- &recv->r_frag->f_sg));
+ (long)sg_dma_address(&recv->r_frag->f_sg));
/* XXX when can this fail? */
ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
} else {
/* We expect errors as the qp is drained during shutdown */
if (rds_conn_up(conn) || rds_conn_connecting(conn))
- rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
+ rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
- wc->status,
+ conn->c_tos, wc->status,
ib_wc_status_msg(wc->status));
}
/* We expect errors as the qp is drained during shutdown */
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
- rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
- &conn->c_laddr, &conn->c_faddr, wc->status,
+ rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
+ &conn->c_laddr, &conn->c_faddr,
+ conn->c_tos, wc->status,
ib_wc_status_msg(wc->status));
}
}
if (i < work_alloc
&& scat != &rm->data.op_sg[rm->data.op_count]) {
len = min(RDS_FRAG_SIZE,
- ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
+ sg_dma_len(scat) - rm->data.op_dmaoff);
send->s_wr.num_sge = 2;
- send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
+ send->s_sge[1].addr = sg_dma_address(scat);
send->s_sge[1].addr += rm->data.op_dmaoff;
send->s_sge[1].length = len;
bytes_sent += len;
rm->data.op_dmaoff += len;
- if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
+ if (rm->data.op_dmaoff == sg_dma_len(scat)) {
scat++;
rm->data.op_dmasg++;
rm->data.op_dmaoff = 0;
}
/* Convert our struct scatterlist to struct ib_sge */
- send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
- send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
+ send->s_sge[0].addr = sg_dma_address(op->op_sg);
+ send->s_sge[0].length = sg_dma_len(op->op_sg);
send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
scat != &op->op_sg[op->op_count]; j++) {
- len = ib_sg_dma_len(ic->i_cm_id->device, scat);
- send->s_sge[j].addr =
- ib_sg_dma_address(ic->i_cm_id->device, scat);
+ len = sg_dma_len(scat);
+ send->s_sge[j].addr = sg_dma_address(scat);
send->s_sge[j].length = len;
send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;