1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
11 #include <uapi/linux/io_uring.h>
15 #include "alloc_cache.h"
20 #if defined(CONFIG_NET)
28 struct sockaddr __user *addr;
48 struct sockaddr __user *addr;
51 bool seen_econnaborted;
67 struct compat_msghdr __user *umsg_compat;
68 struct user_msghdr __user *umsg;
74 unsigned nr_multishot_loops;
76 /* initialised and used only by !msg send variants */
80 void __user *msg_control;
81 /* used only for send zerocopy */
82 struct io_kiocb *notif;
86 * Number of times we'll try and do receives if there's more data. If we
87 * exceed this limit, then add us to the back of the queue and retry from
88 * there. This helps fairness between flooding clients.
90 #define MULTISHOT_MAX_RETRY 32
92 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
94 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
96 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
97 sqe->buf_index || sqe->splice_fd_in))
100 shutdown->how = READ_ONCE(sqe->len);
101 req->flags |= REQ_F_FORCE_ASYNC;
105 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
107 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
111 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
113 sock = sock_from_file(req->file);
117 ret = __sys_shutdown_sock(sock, shutdown->how);
118 io_req_set_res(req, ret, 0);
122 static bool io_net_retry(struct socket *sock, int flags)
124 if (!(flags & MSG_WAITALL))
126 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
129 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
131 if (kmsg->free_iov) {
132 kfree(kmsg->free_iov);
133 kmsg->free_iov_nr = 0;
134 kmsg->free_iov = NULL;
138 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
140 struct io_async_msghdr *hdr = req->async_data;
143 /* can't recycle, ensure we free the iovec if we have one */
144 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
145 io_netmsg_iovec_free(hdr);
149 /* Let normal cleanup path reap it if we fail adding to the cache */
151 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
153 kasan_mempool_poison_object(iov);
154 req->async_data = NULL;
155 req->flags &= ~REQ_F_ASYNC_DATA;
159 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
161 struct io_ring_ctx *ctx = req->ctx;
162 struct io_async_msghdr *hdr;
164 hdr = io_alloc_cache_get(&ctx->netmsg_cache);
167 kasan_mempool_unpoison_object(hdr->free_iov,
168 hdr->free_iov_nr * sizeof(struct iovec));
169 req->flags |= REQ_F_NEED_CLEANUP;
171 req->flags |= REQ_F_ASYNC_DATA;
172 req->async_data = hdr;
176 if (!io_alloc_async_data(req)) {
177 hdr = req->async_data;
178 hdr->free_iov_nr = 0;
179 hdr->free_iov = NULL;
185 /* assign new iovec to kmsg, if we need to */
186 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
190 req->flags |= REQ_F_NEED_CLEANUP;
191 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
193 kfree(kmsg->free_iov);
194 kmsg->free_iov = iov;
199 static inline void io_mshot_prep_retry(struct io_kiocb *req,
200 struct io_async_msghdr *kmsg)
202 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
204 req->flags &= ~REQ_F_BL_EMPTY;
206 sr->len = 0; /* get from the provided buffer */
207 req->buf_index = sr->buf_group;
211 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
212 struct io_async_msghdr *iomsg,
213 struct compat_msghdr *msg, int ddir)
215 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
216 struct compat_iovec __user *uiov;
220 if (iomsg->free_iov) {
221 nr_segs = iomsg->free_iov_nr;
222 iov = iomsg->free_iov;
224 iov = &iomsg->fast_iov;
228 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
231 uiov = compat_ptr(msg->msg_iov);
232 if (req->flags & REQ_F_BUFFER_SELECT) {
235 if (msg->msg_iovlen == 0) {
236 sr->len = iov->iov_len = 0;
237 iov->iov_base = NULL;
238 } else if (msg->msg_iovlen > 1) {
241 if (!access_ok(uiov, sizeof(*uiov)))
243 if (__get_user(clen, &uiov->iov_len))
253 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
254 nr_segs, &iov, &iomsg->msg.msg_iter, true);
255 if (unlikely(ret < 0))
258 return io_net_vec_assign(req, iomsg, iov);
262 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
263 struct user_msghdr *msg, int ddir)
265 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
269 if (iomsg->free_iov) {
270 nr_segs = iomsg->free_iov_nr;
271 iov = iomsg->free_iov;
273 iov = &iomsg->fast_iov;
277 if (!user_access_begin(sr->umsg, sizeof(*sr->umsg)))
281 unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end);
282 unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end);
283 unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end);
284 unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end);
285 unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end);
286 unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end);
289 if (req->flags & REQ_F_BUFFER_SELECT) {
290 if (msg->msg_iovlen == 0) {
291 sr->len = iov->iov_len = 0;
292 iov->iov_base = NULL;
293 } else if (msg->msg_iovlen > 1) {
297 /* we only need the length for provided buffers */
298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
302 sr->len = iov->iov_len;
311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
312 &iov, &iomsg->msg.msg_iter, false);
313 if (unlikely(ret < 0))
316 return io_net_vec_assign(req, iomsg, iov);
319 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
320 struct io_async_msghdr *iomsg)
322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
323 struct user_msghdr msg;
326 iomsg->msg.msg_name = &iomsg->addr;
327 iomsg->msg.msg_iter.nr_segs = 0;
330 if (unlikely(req->ctx->compat)) {
331 struct compat_msghdr cmsg;
333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
347 /* save msg_control as sys_sendmsg() overwrites it */
348 sr->msg_control = iomsg->msg.msg_control_user;
352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
354 struct io_async_msghdr *io = req->async_data;
356 io_netmsg_iovec_free(io);
359 static int io_send_setup(struct io_kiocb *req)
361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
362 struct io_async_msghdr *kmsg = req->async_data;
365 kmsg->msg.msg_name = NULL;
366 kmsg->msg.msg_namelen = 0;
367 kmsg->msg.msg_control = NULL;
368 kmsg->msg.msg_controllen = 0;
369 kmsg->msg.msg_ubuf = NULL;
372 ret = move_addr_to_kernel(sr->addr, sr->addr_len, &kmsg->addr);
373 if (unlikely(ret < 0))
375 kmsg->msg.msg_name = &kmsg->addr;
376 kmsg->msg.msg_namelen = sr->addr_len;
378 if (!io_do_buffer_select(req)) {
379 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
380 &kmsg->msg.msg_iter);
381 if (unlikely(ret < 0))
387 static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg)
389 struct io_async_msghdr *kmsg;
392 kmsg = io_msg_alloc_async(req);
396 return io_send_setup(req);
397 ret = io_sendmsg_copy_hdr(req, kmsg);
399 req->flags |= REQ_F_NEED_CLEANUP;
403 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
405 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
407 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
411 if (req->opcode == IORING_OP_SEND) {
412 if (READ_ONCE(sqe->__pad3[0]))
414 sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
415 sr->addr_len = READ_ONCE(sqe->addr_len);
416 } else if (sqe->addr2 || sqe->file_index) {
420 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
421 sr->len = READ_ONCE(sqe->len);
422 sr->flags = READ_ONCE(sqe->ioprio);
423 if (sr->flags & ~SENDMSG_FLAGS)
425 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
426 if (sr->msg_flags & MSG_DONTWAIT)
427 req->flags |= REQ_F_NOWAIT;
428 if (sr->flags & IORING_RECVSEND_BUNDLE) {
429 if (req->opcode == IORING_OP_SENDMSG)
431 if (!(req->flags & REQ_F_BUFFER_SELECT))
433 sr->msg_flags |= MSG_WAITALL;
434 sr->buf_group = req->buf_index;
435 req->buf_list = NULL;
439 if (req->ctx->compat)
440 sr->msg_flags |= MSG_CMSG_COMPAT;
442 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG);
445 static void io_req_msg_cleanup(struct io_kiocb *req,
446 unsigned int issue_flags)
448 req->flags &= ~REQ_F_NEED_CLEANUP;
449 io_netmsg_recycle(req, issue_flags);
453 * For bundle completions, we need to figure out how many segments we consumed.
454 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
455 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
456 * the segments, then it's a trivial questiont o answer. If we have residual
457 * data in the iter, then loop the segments to figure out how much we
460 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
465 /* no data is always zero segments, and a ubuf is always 1 segment */
468 if (iter_is_ubuf(&kmsg->msg.msg_iter))
471 iov = kmsg->free_iov;
473 iov = &kmsg->fast_iov;
475 /* if all data was transferred, it's basic pointer math */
476 if (!iov_iter_count(&kmsg->msg.msg_iter))
477 return iter_iov(&kmsg->msg.msg_iter) - iov;
479 /* short transfer, count segments */
482 int this_len = min_t(int, iov[nbufs].iov_len, ret);
491 static inline bool io_send_finish(struct io_kiocb *req, int *ret,
492 struct io_async_msghdr *kmsg,
493 unsigned issue_flags)
495 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
496 bool bundle_finished = *ret <= 0;
499 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
500 cflags = io_put_kbuf(req, *ret, issue_flags);
504 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
506 if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
510 * Fill CQE for this receive and see if we should keep trying to
511 * receive from this socket.
513 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
514 io_mshot_prep_retry(req, kmsg);
518 /* Otherwise stop bundle and use the current result. */
520 io_req_set_res(req, *ret, cflags);
525 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
527 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
528 struct io_async_msghdr *kmsg = req->async_data;
534 sock = sock_from_file(req->file);
538 if (!(req->flags & REQ_F_POLLED) &&
539 (sr->flags & IORING_RECVSEND_POLL_FIRST))
542 flags = sr->msg_flags;
543 if (issue_flags & IO_URING_F_NONBLOCK)
544 flags |= MSG_DONTWAIT;
545 if (flags & MSG_WAITALL)
546 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
548 kmsg->msg.msg_control_user = sr->msg_control;
550 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
553 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
555 if (ret > 0 && io_net_retry(sock, flags)) {
556 kmsg->msg.msg_controllen = 0;
557 kmsg->msg.msg_control = NULL;
559 req->flags |= REQ_F_BL_NO_RECYCLE;
562 if (ret == -ERESTARTSYS)
566 io_req_msg_cleanup(req, issue_flags);
569 else if (sr->done_io)
571 io_req_set_res(req, ret, 0);
575 int io_send(struct io_kiocb *req, unsigned int issue_flags)
577 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
578 struct io_async_msghdr *kmsg = req->async_data;
584 sock = sock_from_file(req->file);
588 if (!(req->flags & REQ_F_POLLED) &&
589 (sr->flags & IORING_RECVSEND_POLL_FIRST))
592 flags = sr->msg_flags;
593 if (issue_flags & IO_URING_F_NONBLOCK)
594 flags |= MSG_DONTWAIT;
597 if (io_do_buffer_select(req)) {
598 struct buf_sel_arg arg = {
599 .iovs = &kmsg->fast_iov,
600 .max_len = min_not_zero(sr->len, INT_MAX),
604 if (kmsg->free_iov) {
605 arg.nr_iovs = kmsg->free_iov_nr;
606 arg.iovs = kmsg->free_iov;
607 arg.mode = KBUF_MODE_FREE;
610 if (!(sr->flags & IORING_RECVSEND_BUNDLE))
613 arg.mode |= KBUF_MODE_EXPAND;
615 ret = io_buffers_select(req, &arg, issue_flags);
616 if (unlikely(ret < 0))
619 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
620 kmsg->free_iov_nr = ret;
621 kmsg->free_iov = arg.iovs;
622 req->flags |= REQ_F_NEED_CLEANUP;
624 sr->len = arg.out_len;
627 sr->buf = arg.iovs[0].iov_base;
628 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
629 &kmsg->msg.msg_iter);
633 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
634 arg.iovs, ret, arg.out_len);
639 * If MSG_WAITALL is set, or this is a bundle send, then we need
640 * the full amount. If just bundle is set, if we do a short send
641 * then we complete the bundle sequence rather than continue on.
643 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
644 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
646 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
647 kmsg->msg.msg_flags = flags;
648 ret = sock_sendmsg(sock, &kmsg->msg);
650 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
653 if (ret > 0 && io_net_retry(sock, flags)) {
657 req->flags |= REQ_F_BL_NO_RECYCLE;
660 if (ret == -ERESTARTSYS)
666 else if (sr->done_io)
669 if (!io_send_finish(req, &ret, kmsg, issue_flags))
672 io_req_msg_cleanup(req, issue_flags);
676 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
677 struct io_async_msghdr *iomsg,
678 int namelen, size_t controllen)
680 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
681 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
684 if (unlikely(namelen < 0))
686 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
689 if (check_add_overflow(hdr, controllen, &hdr))
692 iomsg->namelen = namelen;
693 iomsg->controllen = controllen;
700 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
701 struct io_async_msghdr *iomsg)
703 struct user_msghdr msg;
706 iomsg->msg.msg_name = &iomsg->addr;
707 iomsg->msg.msg_iter.nr_segs = 0;
710 if (unlikely(req->ctx->compat)) {
711 struct compat_msghdr cmsg;
713 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
717 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
721 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
722 cmsg.msg_controllen);
726 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
730 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
734 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
738 static int io_recvmsg_prep_setup(struct io_kiocb *req)
740 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
741 struct io_async_msghdr *kmsg;
744 kmsg = io_msg_alloc_async(req);
748 if (req->opcode == IORING_OP_RECV) {
749 kmsg->msg.msg_name = NULL;
750 kmsg->msg.msg_namelen = 0;
751 kmsg->msg.msg_control = NULL;
752 kmsg->msg.msg_get_inq = 1;
753 kmsg->msg.msg_controllen = 0;
754 kmsg->msg.msg_iocb = NULL;
755 kmsg->msg.msg_ubuf = NULL;
757 if (!io_do_buffer_select(req)) {
758 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
759 &kmsg->msg.msg_iter);
766 ret = io_recvmsg_copy_hdr(req, kmsg);
768 req->flags |= REQ_F_NEED_CLEANUP;
772 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
773 IORING_RECVSEND_BUNDLE)
775 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
777 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
781 if (unlikely(sqe->file_index || sqe->addr2))
784 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
785 sr->len = READ_ONCE(sqe->len);
786 sr->flags = READ_ONCE(sqe->ioprio);
787 if (sr->flags & ~RECVMSG_FLAGS)
789 sr->msg_flags = READ_ONCE(sqe->msg_flags);
790 if (sr->msg_flags & MSG_DONTWAIT)
791 req->flags |= REQ_F_NOWAIT;
792 if (sr->msg_flags & MSG_ERRQUEUE)
793 req->flags |= REQ_F_CLEAR_POLLIN;
794 if (req->flags & REQ_F_BUFFER_SELECT) {
796 * Store the buffer group for this multishot receive separately,
797 * as if we end up doing an io-wq based issue that selects a
798 * buffer, it has to be committed immediately and that will
799 * clear ->buf_list. This means we lose the link to the buffer
800 * list, and the eventual buffer put on completion then cannot
803 sr->buf_group = req->buf_index;
804 req->buf_list = NULL;
806 if (sr->flags & IORING_RECV_MULTISHOT) {
807 if (!(req->flags & REQ_F_BUFFER_SELECT))
809 if (sr->msg_flags & MSG_WAITALL)
811 if (req->opcode == IORING_OP_RECV && sr->len)
813 req->flags |= REQ_F_APOLL_MULTISHOT;
815 if (sr->flags & IORING_RECVSEND_BUNDLE) {
816 if (req->opcode == IORING_OP_RECVMSG)
821 if (req->ctx->compat)
822 sr->msg_flags |= MSG_CMSG_COMPAT;
824 sr->nr_multishot_loops = 0;
825 return io_recvmsg_prep_setup(req);
829 * Finishes io_recv and io_recvmsg.
831 * Returns true if it is actually finished, or false if it should run
832 * again (for multishot).
834 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
835 struct io_async_msghdr *kmsg,
836 bool mshot_finished, unsigned issue_flags)
838 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
839 unsigned int cflags = 0;
841 if (kmsg->msg.msg_inq > 0)
842 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
844 if (sr->flags & IORING_RECVSEND_BUNDLE) {
845 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
847 /* bundle with no more immediate buffers, we're done */
848 if (req->flags & REQ_F_BL_EMPTY)
851 cflags |= io_put_kbuf(req, *ret, issue_flags);
855 * Fill CQE for this receive and see if we should keep trying to
856 * receive from this socket.
858 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
859 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
860 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
862 io_mshot_prep_retry(req, kmsg);
863 /* Known not-empty or unknown state, retry */
864 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
865 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
867 /* mshot retries exceeded, force a requeue */
868 sr->nr_multishot_loops = 0;
869 mshot_retry_ret = IOU_REQUEUE;
871 if (issue_flags & IO_URING_F_MULTISHOT)
872 *ret = mshot_retry_ret;
878 /* Finish the request / stop multishot. */
880 io_req_set_res(req, *ret, cflags);
882 if (issue_flags & IO_URING_F_MULTISHOT)
883 *ret = IOU_STOP_MULTISHOT;
886 io_req_msg_cleanup(req, issue_flags);
890 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
891 struct io_sr_msg *sr, void __user **buf,
894 unsigned long ubuf = (unsigned long) *buf;
897 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
902 if (kmsg->controllen) {
903 unsigned long control = ubuf + hdr - kmsg->controllen;
905 kmsg->msg.msg_control_user = (void __user *) control;
906 kmsg->msg.msg_controllen = kmsg->controllen;
909 sr->buf = *buf; /* stash for later copy */
910 *buf = (void __user *) (ubuf + hdr);
911 kmsg->payloadlen = *len = *len - hdr;
915 struct io_recvmsg_multishot_hdr {
916 struct io_uring_recvmsg_out msg;
917 struct sockaddr_storage addr;
920 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
921 struct io_async_msghdr *kmsg,
922 unsigned int flags, bool *finished)
926 struct io_recvmsg_multishot_hdr hdr;
929 kmsg->msg.msg_name = &hdr.addr;
930 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
931 kmsg->msg.msg_namelen = 0;
933 if (sock->file->f_flags & O_NONBLOCK)
934 flags |= MSG_DONTWAIT;
936 err = sock_recvmsg(sock, &kmsg->msg, flags);
937 *finished = err <= 0;
941 hdr.msg = (struct io_uring_recvmsg_out) {
942 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
943 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
946 hdr.msg.payloadlen = err;
947 if (err > kmsg->payloadlen)
948 err = kmsg->payloadlen;
950 copy_len = sizeof(struct io_uring_recvmsg_out);
951 if (kmsg->msg.msg_namelen > kmsg->namelen)
952 copy_len += kmsg->namelen;
954 copy_len += kmsg->msg.msg_namelen;
957 * "fromlen shall refer to the value before truncation.."
960 hdr.msg.namelen = kmsg->msg.msg_namelen;
962 /* ensure that there is no gap between hdr and sockaddr_storage */
963 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
964 sizeof(struct io_uring_recvmsg_out));
965 if (copy_to_user(io->buf, &hdr, copy_len)) {
970 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
971 kmsg->controllen + err;
974 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
976 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
977 struct io_async_msghdr *kmsg = req->async_data;
980 int ret, min_ret = 0;
981 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
982 bool mshot_finished = true;
984 sock = sock_from_file(req->file);
988 if (!(req->flags & REQ_F_POLLED) &&
989 (sr->flags & IORING_RECVSEND_POLL_FIRST))
992 flags = sr->msg_flags;
994 flags |= MSG_DONTWAIT;
997 if (io_do_buffer_select(req)) {
999 size_t len = sr->len;
1001 buf = io_buffer_select(req, &len, issue_flags);
1005 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1006 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
1008 io_kbuf_recycle(req, issue_flags);
1013 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
1016 kmsg->msg.msg_get_inq = 1;
1017 kmsg->msg.msg_inq = -1;
1018 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1019 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1022 /* disable partial retry for recvmsg with cmsg attached */
1023 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1024 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1026 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1027 kmsg->uaddr, flags);
1030 if (ret < min_ret) {
1031 if (ret == -EAGAIN && force_nonblock) {
1032 if (issue_flags & IO_URING_F_MULTISHOT) {
1033 io_kbuf_recycle(req, issue_flags);
1034 return IOU_ISSUE_SKIP_COMPLETE;
1038 if (ret > 0 && io_net_retry(sock, flags)) {
1040 req->flags |= REQ_F_BL_NO_RECYCLE;
1043 if (ret == -ERESTARTSYS)
1046 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1052 else if (sr->done_io)
1055 io_kbuf_recycle(req, issue_flags);
1057 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1058 goto retry_multishot;
1063 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1064 size_t *len, unsigned int issue_flags)
1066 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1070 * If the ring isn't locked, then don't use the peek interface
1071 * to grab multiple buffers as we will lock/unlock between
1072 * this selection and posting the buffers.
1074 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1075 sr->flags & IORING_RECVSEND_BUNDLE) {
1076 struct buf_sel_arg arg = {
1077 .iovs = &kmsg->fast_iov,
1079 .mode = KBUF_MODE_EXPAND,
1082 if (kmsg->free_iov) {
1083 arg.nr_iovs = kmsg->free_iov_nr;
1084 arg.iovs = kmsg->free_iov;
1085 arg.mode |= KBUF_MODE_FREE;
1088 if (kmsg->msg.msg_inq > 0)
1089 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
1091 ret = io_buffers_peek(req, &arg);
1092 if (unlikely(ret < 0))
1095 /* special case 1 vec, can be a fast path */
1097 sr->buf = arg.iovs[0].iov_base;
1098 sr->len = arg.iovs[0].iov_len;
1101 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1103 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
1104 kmsg->free_iov_nr = ret;
1105 kmsg->free_iov = arg.iovs;
1106 req->flags |= REQ_F_NEED_CLEANUP;
1112 buf = io_buffer_select(req, len, issue_flags);
1118 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1119 &kmsg->msg.msg_iter);
1127 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1129 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1130 struct io_async_msghdr *kmsg = req->async_data;
1131 struct socket *sock;
1133 int ret, min_ret = 0;
1134 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1135 size_t len = sr->len;
1137 if (!(req->flags & REQ_F_POLLED) &&
1138 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1141 sock = sock_from_file(req->file);
1142 if (unlikely(!sock))
1145 flags = sr->msg_flags;
1147 flags |= MSG_DONTWAIT;
1150 if (io_do_buffer_select(req)) {
1151 ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
1152 if (unlikely(ret)) {
1153 kmsg->msg.msg_inq = -1;
1159 kmsg->msg.msg_flags = 0;
1160 kmsg->msg.msg_inq = -1;
1162 if (flags & MSG_WAITALL)
1163 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1165 ret = sock_recvmsg(sock, &kmsg->msg, flags);
1166 if (ret < min_ret) {
1167 if (ret == -EAGAIN && force_nonblock) {
1168 if (issue_flags & IO_URING_F_MULTISHOT) {
1169 io_kbuf_recycle(req, issue_flags);
1170 return IOU_ISSUE_SKIP_COMPLETE;
1175 if (ret > 0 && io_net_retry(sock, flags)) {
1179 req->flags |= REQ_F_BL_NO_RECYCLE;
1182 if (ret == -ERESTARTSYS)
1185 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1192 else if (sr->done_io)
1195 io_kbuf_recycle(req, issue_flags);
1197 if (!io_recv_finish(req, &ret, kmsg, ret <= 0, issue_flags))
1198 goto retry_multishot;
1203 void io_send_zc_cleanup(struct io_kiocb *req)
1205 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1206 struct io_async_msghdr *io = req->async_data;
1208 if (req_has_async_data(req))
1209 io_netmsg_iovec_free(io);
1211 io_notif_flush(zc->notif);
1216 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1217 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1219 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1221 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1222 struct io_ring_ctx *ctx = req->ctx;
1223 struct io_kiocb *notif;
1226 req->flags |= REQ_F_POLL_NO_LAZY;
1228 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1230 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1231 if (req->flags & REQ_F_CQE_SKIP)
1234 notif = zc->notif = io_alloc_notif(ctx);
1237 notif->cqe.user_data = req->cqe.user_data;
1239 notif->cqe.flags = IORING_CQE_F_NOTIF;
1240 req->flags |= REQ_F_NEED_CLEANUP;
1242 zc->flags = READ_ONCE(sqe->ioprio);
1243 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1244 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1246 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1247 struct io_notif_data *nd = io_notif_to_data(notif);
1249 nd->zc_report = true;
1250 nd->zc_used = false;
1251 nd->zc_copied = false;
1255 if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
1256 unsigned idx = READ_ONCE(sqe->buf_index);
1258 if (unlikely(idx >= ctx->nr_user_bufs))
1260 idx = array_index_nospec(idx, ctx->nr_user_bufs);
1261 req->imu = READ_ONCE(ctx->user_bufs[idx]);
1262 io_req_set_rsrc_node(notif, ctx, 0);
1265 if (req->opcode == IORING_OP_SEND_ZC) {
1266 if (READ_ONCE(sqe->__pad3[0]))
1268 zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1269 zc->addr_len = READ_ONCE(sqe->addr_len);
1271 if (unlikely(sqe->addr2 || sqe->file_index))
1273 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1277 zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
1278 zc->len = READ_ONCE(sqe->len);
1279 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1280 if (zc->msg_flags & MSG_DONTWAIT)
1281 req->flags |= REQ_F_NOWAIT;
1283 #ifdef CONFIG_COMPAT
1284 if (req->ctx->compat)
1285 zc->msg_flags |= MSG_CMSG_COMPAT;
1287 return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC);
1290 static int io_sg_from_iter_iovec(struct sk_buff *skb,
1291 struct iov_iter *from, size_t length)
1293 skb_zcopy_downgrade_managed(skb);
1294 return zerocopy_fill_skb_from_iter(skb, from, length);
1297 static int io_sg_from_iter(struct sk_buff *skb,
1298 struct iov_iter *from, size_t length)
1300 struct skb_shared_info *shinfo = skb_shinfo(skb);
1301 int frag = shinfo->nr_frags;
1303 struct bvec_iter bi;
1305 unsigned long truesize = 0;
1308 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1309 else if (unlikely(!skb_zcopy_managed(skb)))
1310 return zerocopy_fill_skb_from_iter(skb, from, length);
1312 bi.bi_size = min(from->count, length);
1313 bi.bi_bvec_done = from->iov_offset;
1316 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1317 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1320 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1321 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1322 v.bv_offset, v.bv_len);
1323 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1328 shinfo->nr_frags = frag;
1329 from->bvec += bi.bi_idx;
1330 from->nr_segs -= bi.bi_idx;
1331 from->count -= copied;
1332 from->iov_offset = bi.bi_bvec_done;
1334 skb->data_len += copied;
1336 skb->truesize += truesize;
1340 static int io_send_zc_import(struct io_kiocb *req, struct io_async_msghdr *kmsg)
1342 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1345 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
1346 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter, req->imu,
1347 (u64)(uintptr_t)sr->buf, sr->len);
1350 kmsg->msg.sg_from_iter = io_sg_from_iter;
1352 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
1355 ret = io_notif_account_mem(sr->notif, sr->len);
1358 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1364 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1366 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1367 struct io_async_msghdr *kmsg = req->async_data;
1368 struct socket *sock;
1370 int ret, min_ret = 0;
1372 sock = sock_from_file(req->file);
1373 if (unlikely(!sock))
1375 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1378 if (!(req->flags & REQ_F_POLLED) &&
1379 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1383 ret = io_send_zc_import(req, kmsg);
1388 msg_flags = zc->msg_flags;
1389 if (issue_flags & IO_URING_F_NONBLOCK)
1390 msg_flags |= MSG_DONTWAIT;
1391 if (msg_flags & MSG_WAITALL)
1392 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1393 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1395 kmsg->msg.msg_flags = msg_flags;
1396 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1397 ret = sock_sendmsg(sock, &kmsg->msg);
1399 if (unlikely(ret < min_ret)) {
1400 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1403 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1407 req->flags |= REQ_F_BL_NO_RECYCLE;
1410 if (ret == -ERESTARTSYS)
1417 else if (zc->done_io)
1421 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1422 * flushing notif to io_send_zc_cleanup()
1424 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1425 io_notif_flush(zc->notif);
1426 io_req_msg_cleanup(req, 0);
1428 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1432 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1434 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1435 struct io_async_msghdr *kmsg = req->async_data;
1436 struct socket *sock;
1438 int ret, min_ret = 0;
1440 sock = sock_from_file(req->file);
1441 if (unlikely(!sock))
1443 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1446 if (!(req->flags & REQ_F_POLLED) &&
1447 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1450 flags = sr->msg_flags;
1451 if (issue_flags & IO_URING_F_NONBLOCK)
1452 flags |= MSG_DONTWAIT;
1453 if (flags & MSG_WAITALL)
1454 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1456 kmsg->msg.msg_control_user = sr->msg_control;
1457 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1458 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1459 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1461 if (unlikely(ret < min_ret)) {
1462 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1465 if (ret > 0 && io_net_retry(sock, flags)) {
1467 req->flags |= REQ_F_BL_NO_RECYCLE;
1470 if (ret == -ERESTARTSYS)
1477 else if (sr->done_io)
1481 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1482 * flushing notif to io_send_zc_cleanup()
1484 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1485 io_notif_flush(sr->notif);
1486 io_req_msg_cleanup(req, 0);
1488 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1492 void io_sendrecv_fail(struct io_kiocb *req)
1494 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1497 req->cqe.res = sr->done_io;
1499 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1500 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1501 req->cqe.flags |= IORING_CQE_F_MORE;
1504 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1505 IORING_ACCEPT_POLL_FIRST)
1507 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1509 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1511 if (sqe->len || sqe->buf_index)
1514 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1515 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1516 accept->flags = READ_ONCE(sqe->accept_flags);
1517 accept->nofile = rlimit(RLIMIT_NOFILE);
1518 accept->iou_flags = READ_ONCE(sqe->ioprio);
1519 if (accept->iou_flags & ~ACCEPT_FLAGS)
1522 accept->file_slot = READ_ONCE(sqe->file_index);
1523 if (accept->file_slot) {
1524 if (accept->flags & SOCK_CLOEXEC)
1526 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1527 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1530 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1532 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1533 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1534 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1535 req->flags |= REQ_F_APOLL_MULTISHOT;
1536 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1537 req->flags |= REQ_F_NOWAIT;
1541 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1543 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1544 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1545 bool fixed = !!accept->file_slot;
1546 struct proto_accept_arg arg = {
1547 .flags = force_nonblock ? O_NONBLOCK : 0,
1553 if (!(req->flags & REQ_F_POLLED) &&
1554 accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1559 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1560 if (unlikely(fd < 0))
1565 file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1570 ret = PTR_ERR(file);
1571 if (ret == -EAGAIN && force_nonblock &&
1572 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) {
1574 * if it's multishot and polled, we don't need to
1575 * return EAGAIN to arm the poll infra since it
1576 * has already been done
1578 if (issue_flags & IO_URING_F_MULTISHOT)
1579 return IOU_ISSUE_SKIP_COMPLETE;
1582 if (ret == -ERESTARTSYS)
1585 } else if (!fixed) {
1586 fd_install(fd, file);
1589 ret = io_fixed_fd_install(req, issue_flags, file,
1595 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1597 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1598 io_req_set_res(req, ret, cflags);
1604 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1605 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1607 if (issue_flags & IO_URING_F_MULTISHOT)
1608 return IOU_ISSUE_SKIP_COMPLETE;
1612 io_req_set_res(req, ret, cflags);
1613 return IOU_STOP_MULTISHOT;
1616 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1618 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1620 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1623 sock->domain = READ_ONCE(sqe->fd);
1624 sock->type = READ_ONCE(sqe->off);
1625 sock->protocol = READ_ONCE(sqe->len);
1626 sock->file_slot = READ_ONCE(sqe->file_index);
1627 sock->nofile = rlimit(RLIMIT_NOFILE);
1629 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1630 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1632 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1637 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1639 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1640 bool fixed = !!sock->file_slot;
1645 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1646 if (unlikely(fd < 0))
1649 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1653 ret = PTR_ERR(file);
1654 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1656 if (ret == -ERESTARTSYS)
1659 } else if (!fixed) {
1660 fd_install(fd, file);
1663 ret = io_fixed_fd_install(req, issue_flags, file,
1666 io_req_set_res(req, ret, 0);
1670 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1672 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1673 struct io_async_msghdr *io;
1675 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1678 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1679 conn->addr_len = READ_ONCE(sqe->addr2);
1680 conn->in_progress = conn->seen_econnaborted = false;
1682 io = io_msg_alloc_async(req);
1686 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1689 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1691 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1692 struct io_async_msghdr *io = req->async_data;
1693 unsigned file_flags;
1695 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1697 file_flags = force_nonblock ? O_NONBLOCK : 0;
1699 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1701 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1702 && force_nonblock) {
1703 if (ret == -EINPROGRESS) {
1704 connect->in_progress = true;
1705 } else if (ret == -ECONNABORTED) {
1706 if (connect->seen_econnaborted)
1708 connect->seen_econnaborted = true;
1712 if (connect->in_progress) {
1714 * At least bluetooth will return -EBADFD on a re-connect
1715 * attempt, and it's (supposedly) also valid to get -EISCONN
1716 * which means the previous result is good. For both of these,
1717 * grab the sock_error() and use that for the completion.
1719 if (ret == -EBADFD || ret == -EISCONN)
1720 ret = sock_error(sock_from_file(req->file)->sk);
1722 if (ret == -ERESTARTSYS)
1727 io_req_msg_cleanup(req, issue_flags);
1728 io_req_set_res(req, ret, 0);
1732 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1734 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1735 struct sockaddr __user *uaddr;
1736 struct io_async_msghdr *io;
1738 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1741 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1742 bind->addr_len = READ_ONCE(sqe->addr2);
1744 io = io_msg_alloc_async(req);
1747 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1750 int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1752 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1753 struct io_async_msghdr *io = req->async_data;
1754 struct socket *sock;
1757 sock = sock_from_file(req->file);
1758 if (unlikely(!sock))
1761 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1764 io_req_set_res(req, ret, 0);
1768 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1770 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1772 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1775 listen->backlog = READ_ONCE(sqe->len);
1779 int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1781 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1782 struct socket *sock;
1785 sock = sock_from_file(req->file);
1786 if (unlikely(!sock))
1789 ret = __sys_listen_socket(sock, listen->backlog);
1792 io_req_set_res(req, ret, 0);
1796 void io_netmsg_cache_free(const void *entry)
1798 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1800 if (kmsg->free_iov) {
1801 kasan_mempool_unpoison_object(kmsg->free_iov,
1802 kmsg->free_iov_nr * sizeof(struct iovec));
1803 io_netmsg_iovec_free(kmsg);