^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (c) 2009, Microsoft Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Authors:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Haiyang Zhang <haiyangz@microsoft.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Hank Janssen <hjanssen@microsoft.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * K. Y. Srinivasan <kys@microsoft.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/hyperv.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/uio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/prefetch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "hyperv_vmbus.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define VMBUS_PKT_TRAILER 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * When we write to the ring buffer, check if the host needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * be signaled. Here is the details of this protocol:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * 1. The host guarantees that while it is draining the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * ring buffer, it will set the interrupt_mask to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * indicate it does not need to be interrupted when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * new data is placed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * 2. The host guarantees that it will completely drain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * the ring buffer before exiting the read loop. Further,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * once the ring buffer is empty, it will clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * interrupt_mask and re-check to see if new data has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * arrived.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * KYS: Oct. 30, 2016:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * It looks like Windows hosts have logic to deal with DOS attacks that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * can be triggered if it receives interrupts when it is not expecting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * the interrupt. The host expects interrupts only when the ring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * transitions from empty to non-empty (or full to non full on the guest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * to host ring).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * So, base the signaling decision solely on the ring state until the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * host logic is fixed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct hv_ring_buffer_info *rbi = &channel->outbound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) virt_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /* check interrupt_mask before read_index */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) virt_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * This is the only case we need to signal when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * ring transitions from being empty to non-empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) ++channel->intr_out_empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) vmbus_setevent(channel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* Get the next write location for the specified ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static inline u32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) u32 next = ring_info->ring_buffer->write_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /* Set the next write location for the specified ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) u32 next_write_location)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) ring_info->ring_buffer->write_index = next_write_location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) /* Set the next read location for the specified ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) u32 next_read_location)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) ring_info->ring_buffer->read_index = next_read_location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ring_info->priv_read_index = next_read_location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /* Get the size of the ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static inline u32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) hv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return ring_info->ring_datasize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /* Get the read and write indices as u64 of the specified ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static inline u64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return (u64)ring_info->ring_buffer->write_index << 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * Helper routine to copy from source to ring buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * Assume there is enough room. Handles wrap-around in dest case only!!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) static u32 hv_copyto_ringbuffer(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct hv_ring_buffer_info *ring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) u32 start_write_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) const void *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) u32 srclen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) void *ring_buffer = hv_get_ring_buffer(ring_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) memcpy(ring_buffer + start_write_offset, src, srclen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) start_write_offset += srclen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (start_write_offset >= ring_buffer_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) start_write_offset -= ring_buffer_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) return start_write_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * hv_get_ringbuffer_availbytes()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * Get number of bytes available to read and to write to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * for the specified ring buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) u32 *read, u32 *write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) u32 read_loc, write_loc, dsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) /* Capture the read/write indices before they changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) read_loc = READ_ONCE(rbi->ring_buffer->read_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) write_loc = READ_ONCE(rbi->ring_buffer->write_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) dsize = rbi->ring_datasize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) read_loc - write_loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) *read = dsize - *write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* Get various debug metrics for the specified ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct hv_ring_buffer_debug_info *debug_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) u32 bytes_avail_towrite;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) u32 bytes_avail_toread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) mutex_lock(&ring_info->ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (!ring_info->ring_buffer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) mutex_unlock(&ring_info->ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) hv_get_ringbuffer_availbytes(ring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) &bytes_avail_toread,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) &bytes_avail_towrite);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) debug_info->bytes_avail_toread = bytes_avail_toread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) debug_info->bytes_avail_towrite = bytes_avail_towrite;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) debug_info->current_read_index = ring_info->ring_buffer->read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) debug_info->current_write_index = ring_info->ring_buffer->write_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) debug_info->current_interrupt_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) = ring_info->ring_buffer->interrupt_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) mutex_unlock(&ring_info->ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* Initialize a channel's ring buffer info mutex locks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) mutex_init(&channel->inbound.ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) mutex_init(&channel->outbound.ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) /* Initialize the ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) struct page *pages, u32 page_cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) struct page **pages_wraparound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * First page holds struct hv_ring_buffer, do wraparound mapping for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * the rest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (!pages_wraparound)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) pages_wraparound[0] = pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) for (i = 0; i < 2 * (page_cnt - 1); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) ring_info->ring_buffer = (struct hv_ring_buffer *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) kfree(pages_wraparound);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (!ring_info->ring_buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ring_info->ring_buffer->read_index =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) ring_info->ring_buffer->write_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /* Set the feature bit for enabling flow control. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) ring_info->ring_buffer->feature_bits.value = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) ring_info->ring_size = page_cnt << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) ring_info->ring_size_div10_reciprocal =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) reciprocal_value(ring_info->ring_size / 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) ring_info->ring_datasize = ring_info->ring_size -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) sizeof(struct hv_ring_buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) ring_info->priv_read_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) spin_lock_init(&ring_info->ring_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /* Cleanup the ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) mutex_lock(&ring_info->ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) vunmap(ring_info->ring_buffer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) ring_info->ring_buffer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) mutex_unlock(&ring_info->ring_buffer_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /* Write to the ring buffer. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) int hv_ringbuffer_write(struct vmbus_channel *channel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) const struct kvec *kv_list, u32 kv_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) u32 bytes_avail_towrite;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) u32 totalbytes_towrite = sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) u32 next_write_location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) u32 old_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) u64 prev_indices;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) struct hv_ring_buffer_info *outring_info = &channel->outbound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) if (channel->rescind)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) for (i = 0; i < kv_count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) totalbytes_towrite += kv_list[i].iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) spin_lock_irqsave(&outring_info->ring_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) bytes_avail_towrite = hv_get_bytes_to_write(outring_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * If there is only room for the packet, assume it is full.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * Otherwise, the next time around, we think the ring buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * is empty since the read index == write index.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (bytes_avail_towrite <= totalbytes_towrite) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) ++channel->out_full_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (!channel->out_full_flag) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) ++channel->out_full_first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) channel->out_full_flag = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) spin_unlock_irqrestore(&outring_info->ring_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) channel->out_full_flag = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /* Write to the ring buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) next_write_location = hv_get_next_write_location(outring_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) old_write = next_write_location;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) for (i = 0; i < kv_count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) next_write_location = hv_copyto_ringbuffer(outring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) next_write_location,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) kv_list[i].iov_base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) kv_list[i].iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /* Set previous packet start */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) prev_indices = hv_get_ring_bufferindices(outring_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) next_write_location = hv_copyto_ringbuffer(outring_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) next_write_location,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) &prev_indices,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) sizeof(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) /* Issue a full memory barrier before updating the write index */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) virt_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /* Now, update the write location */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) hv_set_next_write_location(outring_info, next_write_location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) spin_unlock_irqrestore(&outring_info->ring_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) hv_signal_on_write(old_write, channel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (channel->rescind)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) int hv_ringbuffer_read(struct vmbus_channel *channel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) void *buffer, u32 buflen, u32 *buffer_actual_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) u64 *requestid, bool raw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct vmpacket_descriptor *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) u32 packetlen, offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (unlikely(buflen == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) *buffer_actual_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) *requestid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* Make sure there is something to read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) desc = hv_pkt_iter_first(channel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (desc == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * No error is set when there is even no header, drivers are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * supposed to analyze buffer_actual_len.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) offset = raw ? 0 : (desc->offset8 << 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) packetlen = (desc->len8 << 3) - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) *buffer_actual_len = packetlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) *requestid = desc->trans_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) if (unlikely(packetlen > buflen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return -ENOBUFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) /* since ring is double mapped, only one copy is necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) memcpy(buffer, (const char *)desc + offset, packetlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) /* Advance ring index to next packet descriptor */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) __hv_pkt_iter_next(channel, desc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /* Notify host of update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) hv_pkt_iter_close(channel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * Determine number of bytes available in ring buffer after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * the current iterator (priv_read_index) location.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * This is similar to hv_get_bytes_to_read but with private
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * read index instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) u32 priv_read_loc = rbi->priv_read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (write_loc >= priv_read_loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) return write_loc - priv_read_loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) return (rbi->ring_datasize - priv_read_loc) + write_loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * Get first vmbus packet from ring buffer after read_index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * If ring buffer is empty, returns NULL and no other action needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) struct hv_ring_buffer_info *rbi = &channel->inbound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) struct vmpacket_descriptor *desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) hv_debug_delay_test(channel, MESSAGE_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) prefetch((char *)desc + (desc->len8 << 3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) return desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * Get next vmbus packet from ring buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * Advances the current location (priv_read_index) and checks for more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * data. If the end of the ring buffer is reached, then return NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct vmpacket_descriptor *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) __hv_pkt_iter_next(struct vmbus_channel *channel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) const struct vmpacket_descriptor *desc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) struct hv_ring_buffer_info *rbi = &channel->inbound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) u32 packetlen = desc->len8 << 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) u32 dsize = rbi->ring_datasize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) hv_debug_delay_test(channel, MESSAGE_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) /* bump offset to next potential packet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (rbi->priv_read_index >= dsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) rbi->priv_read_index -= dsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) /* more data? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) return hv_pkt_iter_first(channel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) /* How many bytes were read in this iterator cycle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) u32 start_read_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (rbi->priv_read_index >= start_read_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) return rbi->priv_read_index - start_read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) return rbi->ring_datasize - start_read_index +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) rbi->priv_read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * Update host ring buffer after iterating over packets. If the host has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * stopped queuing new entries because it found the ring buffer full, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * sufficient space is being freed up, signal the host. But be careful to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * only signal the host when necessary, both for performance reasons and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * because Hyper-V protects itself by throttling guests that signal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * inappropriately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * Determining when to signal is tricky. There are three key data inputs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * that must be handled in this order to avoid race conditions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * 1. Update the read_index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * 2. Read the pending_send_sz
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * 3. Read the current write_index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * The interrupt_mask is not used to determine when to signal. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * interrupt_mask is used only on the guest->host ring buffer when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * sending requests to the host. The host does not use it on the host->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * guest ring buffer to indicate whether it should be signaled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) void hv_pkt_iter_close(struct vmbus_channel *channel)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) struct hv_ring_buffer_info *rbi = &channel->inbound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) u32 curr_write_sz, pending_sz, bytes_read, start_read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * Make sure all reads are done before we update the read index since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * the writer may start writing to the read area once the read index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * is updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) virt_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) start_read_index = rbi->ring_buffer->read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) rbi->ring_buffer->read_index = rbi->priv_read_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * Older versions of Hyper-V (before WS2102 and Win8) do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * implement pending_send_sz and simply poll if the host->guest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * ring buffer is full. No signaling is needed or expected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * Issue a full memory barrier before making the signaling decision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * If reading pending_send_sz were to be reordered and happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * before we commit the new read_index, a race could occur. If the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * host were to set the pending_send_sz after we have sampled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * pending_send_sz, and the ring buffer blocks before we commit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) * read index, we could miss sending the interrupt. Issue a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * memory barrier to address this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) virt_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * If the pending_send_sz is zero, then the ring buffer is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * blocked and there is no need to signal. This is far by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * most common case, so exit quickly for best performance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (!pending_sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * Ensure the read of write_index in hv_get_bytes_to_write()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * happens after the read of pending_send_sz.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) virt_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) curr_write_sz = hv_get_bytes_to_write(rbi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * We want to signal the host only if we're transitioning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * from a "not enough free space" state to a "enough free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * space" state. For example, it's possible that this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * could run and free up enough space to signal the host, and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * run again and free up additional space before the host has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * chance to clear the pending_send_sz. The 2nd invocation would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * be a null transition from "enough free space" to "enough free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * space", which doesn't warrant a signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * Exactly filling the ring buffer is treated as "not enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * space". The ring buffer always must have at least one byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * empty so the empty and full conditions are distinguishable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * hv_get_bytes_to_write() doesn't fully tell the truth in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * this regard.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) * So first check if we were in the "enough free space" state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * before we began the iteration. If so, the host was not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * blocked, and there's no need to signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) if (curr_write_sz - bytes_read > pending_sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * Similarly, if the new state is "not enough space", then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * there's no need to signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) if (curr_write_sz <= pending_sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) ++channel->intr_in_full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) vmbus_setevent(channel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) EXPORT_SYMBOL_GPL(hv_pkt_iter_close);