^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (c) 2006 Oracle. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This software is available to you under a choice of one of two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * licenses. You may choose to be licensed under the terms of the GNU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * General Public License (GPL) Version 2, available from the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * COPYING in the main directory of this source tree, or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * OpenIB.org BSD license below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Redistribution and use in source and binary forms, with or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * without modification, are permitted provided that the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * conditions are met:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * - Redistributions of source code must retain the above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * copyright notice, this list of conditions and the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * disclaimer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * - Redistributions in binary form must reproduce the above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * copyright notice, this list of conditions and the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * disclaimer in the documentation and/or other materials
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * provided with the distribution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * SOFTWARE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include "rds.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct rds_page_remainder {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct page *r_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) unsigned long r_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * rds_page_remainder_alloc - build up regions of a message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * @scat: Scatter list for message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * @bytes: the number of bytes needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * @gfp: the waiting behaviour of the allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * kmap the pages, etc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * If @bytes is at least a full page then this just returns a page from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * alloc_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * If @bytes is a partial page then this stores the unused region of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * page in a per-cpu structure. Future partial-page allocations may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * satisfied from that cached region. This lets us waste less memory on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * small allocations with minimal complexity. It works because the transmit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * path passes read-only page regions down to devices. They hold a page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * reference until they are done with the region.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) gfp_t gfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct rds_page_remainder *rem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) gfp |= __GFP_HIGHMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /* jump straight to allocation if we're trying for a huge page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (bytes >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) page = alloc_page(gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) sg_set_page(scat, page, PAGE_SIZE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) rem = &per_cpu(rds_page_remainders, get_cpu());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* avoid a tiny region getting stuck by tossing it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) rds_stats_inc(s_page_remainder_miss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) __free_page(rem->r_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) rem->r_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /* hand out a fragment from the cached page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) get_page(sg_page(scat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) if (rem->r_offset != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) rds_stats_inc(s_page_remainder_hit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) rem->r_offset += ALIGN(bytes, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (rem->r_offset >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) __free_page(rem->r_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) rem->r_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /* alloc if there is nothing for us to use */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) page = alloc_page(gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) rem = &per_cpu(rds_page_remainders, get_cpu());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* did someone race to fill the remainder before us? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (rem->r_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /* otherwise install our page and loop around to alloc */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) rem->r_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) rem->r_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) ret ? 0 : scat->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) void rds_page_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) unsigned int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) struct rds_page_remainder *rem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) rem = &per_cpu(rds_page_remainders, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) rdsdebug("cpu %u\n", cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (rem->r_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) __free_page(rem->r_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) rem->r_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }