^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) #include <sys/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <sys/stat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <sys/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "liburing.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "barrier.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) static int __io_uring_get_cqe(struct io_uring *ring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) struct io_uring_cqe **cqe_ptr, int wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) struct io_uring_cq *cq = &ring->cq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) const unsigned mask = *cq->kring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) unsigned head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *cqe_ptr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) head = *cq->khead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * It's necessary to use a read_barrier() before reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * the CQ tail, since the kernel updates it locklessly. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * kernel has the matching store barrier for the update. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * kernel also ensures that previous stores to CQEs are ordered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * with the tail update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) read_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) if (head != *cq->ktail) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *cqe_ptr = &cq->cqes[head & mask];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) if (!wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) ret = io_uring_enter(ring->ring_fd, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) IORING_ENTER_GETEVENTS, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) return -errno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Return an IO completion, if one is readily available. Returns 0 with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * cqe_ptr filled in on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return __io_uring_get_cqe(ring, cqe_ptr, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Return an IO completion, waiting for it if necessary. Returns 0 with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * cqe_ptr filled in on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return __io_uring_get_cqe(ring, cqe_ptr, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * Submit sqes acquired from io_uring_get_sqe() to the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Returns number of sqes submitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) int io_uring_submit(struct io_uring *ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct io_uring_sq *sq = &ring->sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) const unsigned mask = *sq->kring_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) unsigned ktail, ktail_next, submitted, to_submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * If we have pending IO in the kring, submit it first. We need a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * read barrier here to match the kernels store barrier when updating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * the SQ head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) read_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (*sq->khead != *sq->ktail) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) submitted = *sq->kring_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) goto submit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) if (sq->sqe_head == sq->sqe_tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * Fill in sqes that we have queued up, adding them to the kernel ring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) submitted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ktail = ktail_next = *sq->ktail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) to_submit = sq->sqe_tail - sq->sqe_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) while (to_submit--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ktail_next++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) read_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) sq->array[ktail & mask] = sq->sqe_head & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) ktail = ktail_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) sq->sqe_head++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) submitted++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) if (!submitted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) if (*sq->ktail != ktail) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * First write barrier ensures that the SQE stores are updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * with the tail update. This is needed so that the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * will never see a tail update without the preceeding sQE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * stores being done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) write_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) *sq->ktail = ktail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * The kernel has the matching read barrier for reading the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * SQ tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) write_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) submit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) ret = io_uring_enter(ring->ring_fd, submitted, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) IORING_ENTER_GETEVENTS, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) return -errno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * Return an sqe to fill. Application must later call io_uring_submit()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * when it's ready to tell the kernel about it. The caller may call this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * function multiple times before calling io_uring_submit().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * Returns a vacant sqe, or NULL if we're full.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) struct io_uring_sq *sq = &ring->sq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) unsigned next = sq->sqe_tail + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) struct io_uring_sqe *sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * All sqes are used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) if (next - sq->sqe_head > *sq->kring_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) sq->sqe_tail = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return sqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }