1 // SPDX-License-Identifier: GPL-2.0-only
3 * Intel MIC Platform Software Stack (MPSS)
5 * Copyright(c) 2015 Intel Corporation.
10 #include "scif_main.h"
13 * scif_recv_mark: Handle SCIF_MARK request
14 * @msg: Interrupt message
16 * The peer has requested a mark.
18 void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
20 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
24 err = _scif_fence_mark(ep, &mark);
26 msg->uop = SCIF_MARK_NACK;
28 msg->uop = SCIF_MARK_ACK;
29 msg->payload[0] = ep->remote_ep;
30 msg->payload[2] = mark;
31 scif_nodeqp_send(ep->remote_dev, msg);
35 * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
36 * @msg: Interrupt message
38 * The peer has responded to a SCIF_MARK message.
40 void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
42 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
43 struct scif_fence_info *fence_req =
44 (struct scif_fence_info *)msg->payload[1];
46 mutex_lock(&ep->rma_info.rma_lock);
47 if (msg->uop == SCIF_MARK_ACK) {
48 fence_req->state = OP_COMPLETED;
49 fence_req->dma_mark = (int)msg->payload[2];
51 fence_req->state = OP_FAILED;
53 mutex_unlock(&ep->rma_info.rma_lock);
54 complete(&fence_req->comp);
58 * scif_recv_wait: Handle SCIF_WAIT request
59 * @msg: Interrupt message
61 * The peer has requested waiting on a fence.
63 void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
65 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
66 struct scif_remote_fence_info *fence;
69 * Allocate structure for remote fence information and
70 * send a NACK if the allocation failed. The peer will
71 * return ENOMEM upon receiving a NACK.
73 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
75 msg->payload[0] = ep->remote_ep;
76 msg->uop = SCIF_WAIT_NACK;
77 scif_nodeqp_send(ep->remote_dev, msg);
81 /* Prepare the fence request */
82 memcpy(&fence->msg, msg, sizeof(struct scifmsg));
83 INIT_LIST_HEAD(&fence->list);
85 /* Insert to the global remote fence request list */
86 mutex_lock(&scif_info.fencelock);
87 atomic_inc(&ep->rma_info.fence_refcount);
88 list_add_tail(&fence->list, &scif_info.fence);
89 mutex_unlock(&scif_info.fencelock);
91 schedule_work(&scif_info.misc_work);
95 * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
96 * @msg: Interrupt message
98 * The peer has responded to a SCIF_WAIT message.
100 void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
102 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
103 struct scif_fence_info *fence_req =
104 (struct scif_fence_info *)msg->payload[1];
106 mutex_lock(&ep->rma_info.rma_lock);
107 if (msg->uop == SCIF_WAIT_ACK)
108 fence_req->state = OP_COMPLETED;
110 fence_req->state = OP_FAILED;
111 mutex_unlock(&ep->rma_info.rma_lock);
112 complete(&fence_req->comp);
116 * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
117 * @msg: Interrupt message
119 * The peer has requested a signal on a local offset.
121 void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
123 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
126 err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
129 msg->uop = SCIF_SIG_NACK;
131 msg->uop = SCIF_SIG_ACK;
132 msg->payload[0] = ep->remote_ep;
133 scif_nodeqp_send(ep->remote_dev, msg);
137 * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
138 * @msg: Interrupt message
140 * The peer has requested a signal on a remote offset.
142 void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
144 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
147 err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
150 msg->uop = SCIF_SIG_NACK;
152 msg->uop = SCIF_SIG_ACK;
153 msg->payload[0] = ep->remote_ep;
154 scif_nodeqp_send(ep->remote_dev, msg);
158 * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
159 * @msg: Interrupt message
161 * The peer has responded to a signal request.
163 void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
165 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
166 struct scif_fence_info *fence_req =
167 (struct scif_fence_info *)msg->payload[3];
169 mutex_lock(&ep->rma_info.rma_lock);
170 if (msg->uop == SCIF_SIG_ACK)
171 fence_req->state = OP_COMPLETED;
173 fence_req->state = OP_FAILED;
174 mutex_unlock(&ep->rma_info.rma_lock);
175 complete(&fence_req->comp);
178 static inline void *scif_get_local_va(off_t off, struct scif_window *window)
180 struct page **pages = window->pinned_pages->pages;
181 int page_nr = (off - window->offset) >> PAGE_SHIFT;
182 off_t page_off = off & ~PAGE_MASK;
184 return page_address(pages[page_nr]) + page_off;
187 static void scif_prog_signal_cb(void *arg)
189 struct scif_cb_arg *cb_arg = arg;
191 dma_pool_free(cb_arg->ep->remote_dev->signal_pool, cb_arg->status,
192 cb_arg->src_dma_addr);
196 static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
198 struct scif_endpt *ep = (struct scif_endpt *)epd;
199 struct dma_chan *chan = ep->rma_info.dma_chan;
200 struct dma_device *ddev = chan->device;
201 bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
202 struct dma_async_tx_descriptor *tx;
203 struct scif_status *status = NULL;
204 struct scif_cb_arg *cb_arg = NULL;
209 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
212 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
213 __func__, __LINE__, err);
216 cookie = tx->tx_submit(tx);
217 if (dma_submit_error(cookie)) {
219 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
220 __func__, __LINE__, err);
223 dma_async_issue_pending(chan);
226 * For X100 use the status descriptor to write the value to
229 tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
231 status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
235 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
236 __func__, __LINE__, err);
240 status->src_dma_addr = src;
242 src += offsetof(struct scif_status, val);
243 tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
248 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
249 __func__, __LINE__, err);
253 cb_arg = kmalloc(sizeof(*cb_arg), GFP_KERNEL);
258 cb_arg->src_dma_addr = src;
259 cb_arg->status = status;
261 tx->callback = scif_prog_signal_cb;
262 tx->callback_param = cb_arg;
264 cookie = tx->tx_submit(tx);
265 if (dma_submit_error(cookie)) {
267 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
268 __func__, __LINE__, err);
271 dma_async_issue_pending(chan);
275 dma_pool_free(ep->remote_dev->signal_pool, status,
276 src - offsetof(struct scif_status, val));
285 * @epd - Endpoint Descriptor
286 * @offset - registered address to write @val to
287 * @val - Value to be written at @offset
288 * @type - Type of the window.
290 * Arrange to write a value to the registered offset after ensuring that the
291 * offset provided is indeed valid.
293 int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
294 enum scif_window_type type)
296 struct scif_endpt *ep = (struct scif_endpt *)epd;
297 struct scif_window *window = NULL;
298 struct scif_rma_req req;
299 dma_addr_t dst_dma_addr;
302 mutex_lock(&ep->rma_info.rma_lock);
303 req.out_window = &window;
305 req.nr_bytes = sizeof(u64);
306 req.prot = SCIF_PROT_WRITE;
307 req.type = SCIF_WINDOW_SINGLE;
308 if (type == SCIF_WINDOW_SELF)
309 req.head = &ep->rma_info.reg_list;
311 req.head = &ep->rma_info.remote_reg_list;
312 /* Does a valid window exist? */
313 err = scif_query_window(&req);
315 dev_err(scif_info.mdev.this_device,
316 "%s %d err %d\n", __func__, __LINE__, err);
320 if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
323 if (type == SCIF_WINDOW_SELF)
324 dst_virt = scif_get_local_va(offset, window);
327 scif_get_local_va(offset, (struct scif_window *)
328 window->peer_window);
331 dst_dma_addr = __scif_off_to_dma_addr(window, offset);
332 err = _scif_prog_signal(epd, dst_dma_addr, val);
335 mutex_unlock(&ep->rma_info.rma_lock);
339 static int _scif_fence_wait(scif_epd_t epd, int mark)
341 struct scif_endpt *ep = (struct scif_endpt *)epd;
342 dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
345 /* Wait for DMA callback in scif_fence_mark_cb(..) */
346 err = wait_event_interruptible_timeout(ep->rma_info.markwq,
347 dma_async_is_tx_complete(
348 ep->rma_info.dma_chan,
349 cookie, NULL, NULL) ==
351 SCIF_NODE_ALIVE_TIMEOUT);
360 * scif_rma_handle_remote_fences:
362 * This routine services remote fence requests.
364 void scif_rma_handle_remote_fences(void)
366 struct list_head *item, *tmp;
367 struct scif_remote_fence_info *fence;
368 struct scif_endpt *ep;
372 mutex_lock(&scif_info.fencelock);
373 list_for_each_safe(item, tmp, &scif_info.fence) {
374 fence = list_entry(item, struct scif_remote_fence_info,
376 /* Remove fence from global list */
377 list_del(&fence->list);
379 /* Initiate the fence operation */
380 ep = (struct scif_endpt *)fence->msg.payload[0];
381 mark = fence->msg.payload[2];
382 err = _scif_fence_wait(ep, mark);
384 fence->msg.uop = SCIF_WAIT_NACK;
386 fence->msg.uop = SCIF_WAIT_ACK;
387 fence->msg.payload[0] = ep->remote_ep;
388 scif_nodeqp_send(ep->remote_dev, &fence->msg);
390 if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
391 schedule_work(&scif_info.misc_work);
393 mutex_unlock(&scif_info.fencelock);
396 static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
400 struct scif_fence_info *fence_req;
401 struct scif_endpt *ep = (struct scif_endpt *)epd;
403 fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
409 fence_req->state = OP_IN_PROGRESS;
410 init_completion(&fence_req->comp);
414 msg.payload[0] = ep->remote_ep;
415 msg.payload[1] = (u64)fence_req;
416 if (uop == SCIF_WAIT)
417 msg.payload[2] = mark;
418 spin_lock(&ep->lock);
419 if (ep->state == SCIFEP_CONNECTED)
420 err = scif_nodeqp_send(ep->remote_dev, &msg);
423 spin_unlock(&ep->lock);
427 /* Wait for a SCIF_WAIT_(N)ACK message */
428 err = wait_for_completion_timeout(&fence_req->comp,
429 SCIF_NODE_ALIVE_TIMEOUT);
430 if (!err && scifdev_alive(ep))
436 mutex_lock(&ep->rma_info.rma_lock);
438 if (fence_req->state == OP_IN_PROGRESS)
439 fence_req->state = OP_FAILED;
441 if (fence_req->state == OP_FAILED && !err)
443 if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
444 *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
445 mutex_unlock(&ep->rma_info.rma_lock);
453 * scif_send_fence_mark:
454 * @epd: end point descriptor.
455 * @out_mark: Output DMA mark reported by peer.
457 * Send a remote fence mark request.
459 static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
461 return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
465 * scif_send_fence_wait:
466 * @epd: end point descriptor.
467 * @mark: DMA mark to wait for.
469 * Send a remote fence wait request.
471 static int scif_send_fence_wait(scif_epd_t epd, int mark)
473 return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
476 static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
477 struct scif_fence_info *fence_req)
482 /* Wait for a SCIF_SIG_(N)ACK message */
483 err = wait_for_completion_timeout(&fence_req->comp,
484 SCIF_NODE_ALIVE_TIMEOUT);
485 if (!err && scifdev_alive(ep))
492 mutex_lock(&ep->rma_info.rma_lock);
493 if (fence_req->state == OP_IN_PROGRESS)
494 fence_req->state = OP_FAILED;
495 mutex_unlock(&ep->rma_info.rma_lock);
497 if (fence_req->state == OP_FAILED && !err)
503 * scif_send_fence_signal:
504 * @epd - endpoint descriptor
505 * @loff - local offset
506 * @lval - local value to write to loffset
507 * @roff - remote offset
508 * @rval - remote value to write to roffset
511 * Sends a remote fence signal request
513 static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
514 off_t loff, u64 lval, int flags)
518 struct scif_fence_info *fence_req;
519 struct scif_endpt *ep = (struct scif_endpt *)epd;
521 fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
527 fence_req->state = OP_IN_PROGRESS;
528 init_completion(&fence_req->comp);
530 if (flags & SCIF_SIGNAL_LOCAL) {
531 msg.uop = SCIF_SIG_LOCAL;
532 msg.payload[0] = ep->remote_ep;
533 msg.payload[1] = roff;
534 msg.payload[2] = rval;
535 msg.payload[3] = (u64)fence_req;
536 spin_lock(&ep->lock);
537 if (ep->state == SCIFEP_CONNECTED)
538 err = scif_nodeqp_send(ep->remote_dev, &msg);
541 spin_unlock(&ep->lock);
544 err = _scif_send_fence_signal_wait(ep, fence_req);
548 fence_req->state = OP_IN_PROGRESS;
550 if (flags & SCIF_SIGNAL_REMOTE) {
551 msg.uop = SCIF_SIG_REMOTE;
552 msg.payload[0] = ep->remote_ep;
553 msg.payload[1] = loff;
554 msg.payload[2] = lval;
555 msg.payload[3] = (u64)fence_req;
556 spin_lock(&ep->lock);
557 if (ep->state == SCIFEP_CONNECTED)
558 err = scif_nodeqp_send(ep->remote_dev, &msg);
561 spin_unlock(&ep->lock);
564 err = _scif_send_fence_signal_wait(ep, fence_req);
572 static void scif_fence_mark_cb(void *arg)
574 struct scif_endpt *ep = (struct scif_endpt *)arg;
576 wake_up_interruptible(&ep->rma_info.markwq);
577 atomic_dec(&ep->rma_info.fence_refcount);
583 * @epd - endpoint descriptor
584 * Set up a mark for this endpoint and return the value of the mark.
586 int _scif_fence_mark(scif_epd_t epd, int *mark)
588 struct scif_endpt *ep = (struct scif_endpt *)epd;
589 struct dma_chan *chan = ep->rma_info.dma_chan;
590 struct dma_device *ddev = chan->device;
591 struct dma_async_tx_descriptor *tx;
595 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
598 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
599 __func__, __LINE__, err);
602 cookie = tx->tx_submit(tx);
603 if (dma_submit_error(cookie)) {
605 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
606 __func__, __LINE__, err);
609 dma_async_issue_pending(chan);
610 tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
613 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
614 __func__, __LINE__, err);
617 tx->callback = scif_fence_mark_cb;
618 tx->callback_param = ep;
619 *mark = cookie = tx->tx_submit(tx);
620 if (dma_submit_error(cookie)) {
622 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
623 __func__, __LINE__, err);
626 atomic_inc(&ep->rma_info.fence_refcount);
627 dma_async_issue_pending(chan);
631 #define SCIF_LOOPB_MAGIC_MARK 0xdead
633 int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
635 struct scif_endpt *ep = (struct scif_endpt *)epd;
638 dev_dbg(scif_info.mdev.this_device,
639 "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
641 err = scif_verify_epd(ep);
646 if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
649 /* At least one of init self or peer RMA should be set */
650 if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
653 /* Exactly one of init self or peer RMA should be set but not both */
654 if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
658 * Management node loopback does not need to use DMA.
659 * Return a valid mark to be symmetric.
661 if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
662 *mark = SCIF_LOOPB_MAGIC_MARK;
666 if (flags & SCIF_FENCE_INIT_SELF)
667 err = _scif_fence_mark(epd, mark);
669 err = scif_send_fence_mark(ep, mark);
672 dev_err(scif_info.mdev.this_device,
673 "%s %d err %d\n", __func__, __LINE__, err);
674 dev_dbg(scif_info.mdev.this_device,
675 "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
676 ep, flags, *mark, err);
679 EXPORT_SYMBOL_GPL(scif_fence_mark);
681 int scif_fence_wait(scif_epd_t epd, int mark)
683 struct scif_endpt *ep = (struct scif_endpt *)epd;
686 dev_dbg(scif_info.mdev.this_device,
687 "SCIFAPI fence_wait: ep %p mark 0x%x\n",
689 err = scif_verify_epd(ep);
693 * Management node loopback does not need to use DMA.
694 * The only valid mark provided is 0 so simply
695 * return success if the mark is valid.
697 if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
698 if (mark == SCIF_LOOPB_MAGIC_MARK)
703 if (mark & SCIF_REMOTE_FENCE)
704 err = scif_send_fence_wait(epd, mark);
706 err = _scif_fence_wait(epd, mark);
708 dev_err(scif_info.mdev.this_device,
709 "%s %d err %d\n", __func__, __LINE__, err);
712 EXPORT_SYMBOL_GPL(scif_fence_wait);
714 int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
715 off_t roff, u64 rval, int flags)
717 struct scif_endpt *ep = (struct scif_endpt *)epd;
720 dev_dbg(scif_info.mdev.this_device,
721 "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
722 ep, loff, lval, roff, rval, flags);
723 err = scif_verify_epd(ep);
728 if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
729 SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
732 /* At least one of init self or peer RMA should be set */
733 if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
736 /* Exactly one of init self or peer RMA should be set but not both */
737 if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
740 /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
741 if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
744 /* Only Dword offsets allowed */
745 if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
748 /* Only Dword aligned offsets allowed */
749 if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
752 if (flags & SCIF_FENCE_INIT_PEER) {
753 err = scif_send_fence_signal(epd, roff, rval, loff,
756 /* Local Signal in Local RAS */
757 if (flags & SCIF_SIGNAL_LOCAL) {
758 err = scif_prog_signal(epd, loff, lval,
764 /* Signal in Remote RAS */
765 if (flags & SCIF_SIGNAL_REMOTE)
766 err = scif_prog_signal(epd, roff,
767 rval, SCIF_WINDOW_PEER);
771 dev_err(scif_info.mdev.this_device,
772 "%s %d err %d\n", __func__, __LINE__, err);
775 EXPORT_SYMBOL_GPL(scif_fence_signal);