^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright(c) 2015 - 2020 Intel Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * This file is provided under a dual BSD/GPLv2 license. When using or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * redistributing this file, you may do so under either license.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * GPL LICENSE SUMMARY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * This program is free software; you can redistribute it and/or modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * it under the terms of version 2 of the GNU General Public License as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * This program is distributed in the hope that it will be useful, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * WITHOUT ANY WARRANTY; without even the implied warranty of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * General Public License for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * BSD LICENSE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Redistribution and use in source and binary forms, with or without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * modification, are permitted provided that the following conditions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * are met:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * - Redistributions of source code must retain the above copyright
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * notice, this list of conditions and the following disclaimer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * - Redistributions in binary form must reproduce the above copyright
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * notice, this list of conditions and the following disclaimer in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * the documentation and/or other materials provided with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * distribution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * - Neither the name of Intel Corporation nor the names of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * contributors may be used to endorse or promote products derived
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * from this software without specific prior written permission.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <linux/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <linux/cpumask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <linux/numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include "hfi.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include "affinity.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include "sdma.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include "trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct hfi1_affinity_node_list node_affinity = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) .list = LIST_HEAD_INIT(node_affinity.list),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) .lock = __MUTEX_INITIALIZER(node_affinity.lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Name of IRQ types, indexed by enum irq_type */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static const char * const irq_type_names[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) "SDMA",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) "RCVCTXT",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) "NETDEVCTXT",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) "GENERAL",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) "OTHER",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* Per NUMA node count of HFI devices */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) static unsigned int *hfi1_per_node_cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) static inline void init_cpu_mask_set(struct cpu_mask_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) cpumask_clear(&set->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) cpumask_clear(&set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) set->gen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* Increment generation of CPU set if needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) if (cpumask_equal(&set->mask, &set->used)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * We've used up all the CPUs, bump up the generation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * and reset the 'used' map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) set->gen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) cpumask_clear(&set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) if (cpumask_empty(&set->used) && set->gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) set->gen--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) cpumask_copy(&set->used, &set->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /* Get the first CPU from the list of unused CPUs in a CPU set data structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) if (!diff || !set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) _cpu_mask_set_gen_inc(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /* Find out CPUs left in CPU mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) cpumask_andnot(diff, &set->mask, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) cpu = cpumask_first(diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (cpu >= nr_cpu_ids) /* empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) cpu = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) cpumask_set_cpu(cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (!set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) cpumask_clear_cpu(cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) _cpu_mask_set_gen_dec(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* Initialize non-HT cpu cores mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) void init_real_cpu_mask(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) int possible, curr_cpu, i, ht;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) cpumask_clear(&node_affinity.real_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /* Start with cpu online mask as the real cpu mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Remove HT cores from the real cpu mask. Do this in two steps below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) possible = cpumask_weight(&node_affinity.real_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) ht = cpumask_weight(topology_sibling_cpumask(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) cpumask_first(&node_affinity.real_cpu_mask)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * Step 1. Skip over the first N HT siblings and use them as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * "real" cores. Assumes that HT cores are not enumerated in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * succession (except in the single core case).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) for (i = 0; i < possible / ht; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * skip any gaps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) for (; i < possible; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) int node_affinity_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) struct pci_dev *dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) const struct pci_device_id *ids = hfi1_pci_tbl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) cpumask_clear(&node_affinity.proc.used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) node_affinity.proc.gen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) node_affinity.num_core_siblings =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) cpumask_weight(topology_sibling_cpumask(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) cpumask_first(&node_affinity.proc.mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) ));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) node_affinity.num_possible_nodes = num_possible_nodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) node_affinity.num_online_nodes = num_online_nodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) node_affinity.num_online_cpus = num_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * The real cpu mask is part of the affinity struct but it has to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * initialized early. It is needed to calculate the number of user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * contexts in set_up_context_variables().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) init_real_cpu_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (!hfi1_per_node_cntr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) while (ids->vendor) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) dev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) node = pcibus_to_node(dev->bus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) if (node < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) hfi1_per_node_cntr[node]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) ids++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * Invalid PCI NUMA node information found, note it, and populate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * our database 1:1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) pr_err("HFI: System BIOS may need to be upgraded\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) for (node = 0; node < node_affinity.num_possible_nodes; node++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) hfi1_per_node_cntr[node] = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) static void node_affinity_destroy(struct hfi1_affinity_node *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) free_percpu(entry->comp_vect_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) kfree(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) void node_affinity_destroy_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) struct list_head *pos, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) list_for_each_safe(pos, q, &node_affinity.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) entry = list_entry(pos, struct hfi1_affinity_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) list_del(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) node_affinity_destroy(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) kfree(hfi1_per_node_cntr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static struct hfi1_affinity_node *node_affinity_allocate(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) entry = kzalloc(sizeof(*entry), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (!entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) entry->node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) entry->comp_vect_affinity = alloc_percpu(u16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) INIT_LIST_HEAD(&entry->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) return entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * It appends an entry to the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * It *must* be called with node_affinity.lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) list_add_tail(&entry->list, &node_affinity.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /* It must be called with node_affinity.lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) static struct hfi1_affinity_node *node_affinity_lookup(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) struct list_head *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) list_for_each(pos, &node_affinity.list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) entry = list_entry(pos, struct hfi1_affinity_node, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (entry->node == node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) return entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) u16 __percpu *comp_vect_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) int curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) u16 cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) u16 prev_cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) int ret_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (!possible_cpumask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) ret_cpu = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) if (!comp_vect_affinity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) ret_cpu = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) ret_cpu = cpumask_first(possible_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (ret_cpu >= nr_cpu_ids) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) ret_cpu = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) for_each_cpu(curr_cpu, possible_cpumask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (cntr < prev_cntr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) ret_cpu = curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) prev_cntr = cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) return ret_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) u16 __percpu *comp_vect_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) int curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) int max_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) u16 cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) u16 prev_cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (!possible_cpumask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (!comp_vect_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) max_cpu = cpumask_first(possible_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (max_cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) for_each_cpu(curr_cpu, possible_cpumask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) if (cntr > prev_cntr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) max_cpu = curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) prev_cntr = cntr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) return max_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * Non-interrupt CPUs are used first, then interrupt CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * Two already allocated cpu masks must be passed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) struct hfi1_affinity_node *entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) cpumask_var_t non_intr_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) cpumask_var_t available_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) __must_hold(&node_affinity.lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) struct cpu_mask_set *set = dd->comp_vect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) lockdep_assert_held(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (!non_intr_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) if (!available_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /* Available CPUs for pinning completion vectors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) _cpu_mask_set_gen_inc(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) cpumask_andnot(available_cpus, &set->mask, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /* Available CPUs without SDMA engine interrupts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) cpumask_andnot(non_intr_cpus, available_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) &entry->def_intr.used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /* If there are non-interrupt CPUs available, use them first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) if (!cpumask_empty(non_intr_cpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) cpu = cpumask_first(non_intr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) else /* Otherwise, use interrupt CPUs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) cpu = cpumask_first(available_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (cpu >= nr_cpu_ids) { /* empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) cpumask_set_cpu(cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct cpu_mask_set *set = dd->comp_vect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) if (cpu < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) cpu_mask_set_put(set, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) /* _dev_comp_vect_mappings_destroy() is reentrant */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) int i, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (!dd->comp_vect_mappings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) cpu = dd->comp_vect_mappings[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) _dev_comp_vect_cpu_put(dd, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) dd->comp_vect_mappings[i] = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) hfi1_cdbg(AFFINITY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) "[%s] Release CPU %d from completion vector %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) kfree(dd->comp_vect_mappings);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) dd->comp_vect_mappings = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * This function creates the table for looking up CPUs for completion vectors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * num_comp_vectors needs to have been initilized before calling this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) struct hfi1_affinity_node *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) __must_hold(&node_affinity.lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) int i, cpu, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) cpumask_var_t non_intr_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) cpumask_var_t available_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) lockdep_assert_held(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) free_cpumask_var(non_intr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) sizeof(*dd->comp_vect_mappings),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if (!dd->comp_vect_mappings) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) for (i = 0; i < dd->comp_vect_possible_cpus; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) dd->comp_vect_mappings[i] = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) available_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (cpu < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) dd->comp_vect_mappings[i] = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) hfi1_cdbg(AFFINITY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) "[%s] Completion Vector %d -> CPU %d",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) free_cpumask_var(available_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) free_cpumask_var(non_intr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) free_cpumask_var(available_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) free_cpumask_var(non_intr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) _dev_comp_vect_mappings_destroy(dd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) entry = node_affinity_lookup(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (!entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) ret = _dev_comp_vect_mappings_create(dd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) _dev_comp_vect_mappings_destroy(dd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (!dd->comp_vect_mappings)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (comp_vect >= dd->comp_vect_possible_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) return dd->comp_vect_mappings[comp_vect];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * It assumes dd->comp_vect_possible_cpus is available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) struct hfi1_affinity_node *entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) bool first_dev_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) __must_hold(&node_affinity.lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) int i, j, curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) int possible_cpus_comp_vect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) lockdep_assert_held(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * If there's only one CPU available for completion vectors, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * there will only be one completion vector available. Othewise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * the number of completion vector available will be the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) * available CPUs divide it by the number of devices in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * local NUMA node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) if (cpumask_weight(&entry->comp_vect_mask) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) possible_cpus_comp_vect = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) dd_dev_warn(dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) possible_cpus_comp_vect +=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) cpumask_weight(&entry->comp_vect_mask) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) hfi1_per_node_cntr[dd->node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * If the completion vector CPUs available doesn't divide
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * evenly among devices, then the first device device to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * initialized gets an extra CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) if (first_dev_init &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) cpumask_weight(&entry->comp_vect_mask) %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) hfi1_per_node_cntr[dd->node] != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) possible_cpus_comp_vect++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) /* Reserving CPUs for device completion vector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) entry->comp_vect_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) if (curr_cpu < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) hfi1_cdbg(AFFINITY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) "[%s] Completion vector affinity CPU set(s) %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) cpumask_pr_args(dev_comp_vect_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) for (j = 0; j < i; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) per_cpu_affinity_put_max(&entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) entry->comp_vect_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) return curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * It assumes dd->comp_vect_possible_cpus is available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) struct hfi1_affinity_node *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) __must_hold(&node_affinity.lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) int i, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) lockdep_assert_held(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (!dd->comp_vect_possible_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) entry->comp_vect_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) /* Clearing CPU in device completion vector cpu mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (cpu >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) dd->comp_vect_possible_cpus = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * Interrupt affinity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * non-rcv avail gets a default mask that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * starts as possible cpus with threads reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * and each rcv avail reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * rcv avail gets node relative 1 wrapping back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * to the node relative 1 as necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) const struct cpumask *local_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) int curr_cpu, possible, i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) bool new_entry = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) local_mask = cpumask_of_node(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) if (cpumask_first(local_mask) >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) local_mask = topology_core_cpumask(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) entry = node_affinity_lookup(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * If this is the first time this NUMA node's affinity is used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * create an entry in the global affinity structure and initialize it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) if (!entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) entry = node_affinity_allocate(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) if (!entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) dd_dev_err(dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) "Unable to allocate global affinity node\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) new_entry = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) init_cpu_mask_set(&entry->def_intr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) init_cpu_mask_set(&entry->rcv_intr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) cpumask_clear(&entry->comp_vect_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) cpumask_clear(&entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) /* Use the "real" cpu mask of this node as the default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) local_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) /* fill in the receive list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) possible = cpumask_weight(&entry->def_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) curr_cpu = cpumask_first(&entry->def_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (possible == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) /* only one CPU, everyone will use it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * The general/control context will be the first CPU in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * the default list, so it is removed from the default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * list and added to the general interrupt list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) curr_cpu = cpumask_next(curr_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) &entry->def_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) * Remove the remaining kernel receive queues from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) * the default list and add them to the receive list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) for (i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) i < (dd->n_krcv_queues - 1) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) hfi1_per_node_cntr[dd->node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) cpumask_clear_cpu(curr_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) &entry->def_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) cpumask_set_cpu(curr_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) &entry->rcv_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) curr_cpu = cpumask_next(curr_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) &entry->def_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (curr_cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * If there ends up being 0 CPU cores leftover for SDMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * engines, use the same CPU cores as general/control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) if (cpumask_weight(&entry->def_intr.mask) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) cpumask_copy(&entry->def_intr.mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) &entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) /* Determine completion vector CPUs for the entire node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) cpumask_and(&entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) &node_affinity.real_cpu_mask, local_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) cpumask_andnot(&entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) &entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) &entry->rcv_intr.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) cpumask_andnot(&entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) &entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) &entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) * If there ends up being 0 CPU cores leftover for completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) * vectors, use the same CPU core as the general/control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) * context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (cpumask_weight(&entry->comp_vect_mask) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) cpumask_copy(&entry->comp_vect_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) &entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) if (new_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) node_affinity_add_tail(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) dd->affinity_entry = entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (new_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) node_affinity_destroy(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) if (!dd->affinity_entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) entry = node_affinity_lookup(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) if (!entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) * Free device completion vector CPUs to be used by future
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) * completion vectors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) _dev_comp_vect_cpu_mask_clean_up(dd, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) dd->affinity_entry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) * Function updates the irq affinity hint for msix after it has been changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * by the user using the /proc/irq interface. This function only accepts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * one cpu in the mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) struct sdma_engine *sde = msix->arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) struct hfi1_devdata *dd = sde->dd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) struct cpu_mask_set *set;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) int i, old_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) if (cpu > num_online_cpus() || cpu == sde->cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) entry = node_affinity_lookup(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) if (!entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) old_cpu = sde->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) sde->cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) cpumask_clear(&msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) cpumask_set_cpu(cpu, &msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) msix->irq, irq_type_names[msix->type],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) sde->this_idx, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) irq_set_affinity_hint(msix->irq, &msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * Set the new cpu in the hfi1_affinity_node and clean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * the old cpu if it is not used by any other IRQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) set = &entry->def_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) cpumask_set_cpu(cpu, &set->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) cpumask_set_cpu(cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) for (i = 0; i < dd->msix_info.max_requested; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) struct hfi1_msix_entry *other_msix;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) other_msix = &dd->msix_info.msix_entries[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) if (other_msix->type != IRQ_SDMA || other_msix == msix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (cpumask_test_cpu(old_cpu, &other_msix->mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) cpumask_clear_cpu(old_cpu, &set->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) cpumask_clear_cpu(old_cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) const cpumask_t *mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) int cpu = cpumask_first(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) struct hfi1_msix_entry *msix = container_of(notify,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) struct hfi1_msix_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) notify);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) /* Only one CPU configuration supported currently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) hfi1_update_sdma_affinity(msix, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) static void hfi1_irq_notifier_release(struct kref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * This is required by affinity notifier. We don't have anything to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * free here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) struct irq_affinity_notify *notify = &msix->notify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) notify->irq = msix->irq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) notify->notify = hfi1_irq_notifier_notify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) notify->release = hfi1_irq_notifier_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) if (irq_set_affinity_notifier(notify->irq, notify))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) notify->irq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) struct irq_affinity_notify *notify = &msix->notify;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (irq_set_affinity_notifier(notify->irq, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) notify->irq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) * Function sets the irq affinity for msix.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * It *must* be called with node_affinity.lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) static int get_irq_affinity(struct hfi1_devdata *dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) struct hfi1_msix_entry *msix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) cpumask_var_t diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) struct cpu_mask_set *set = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) struct sdma_engine *sde = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) struct hfi1_ctxtdata *rcd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) char extra[64];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) int cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) extra[0] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) cpumask_clear(&msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) entry = node_affinity_lookup(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) switch (msix->type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) case IRQ_SDMA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) sde = (struct sdma_engine *)msix->arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) scnprintf(extra, 64, "engine %u", sde->this_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) set = &entry->def_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) case IRQ_GENERAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) cpu = cpumask_first(&entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) case IRQ_RCVCTXT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) rcd = (struct hfi1_ctxtdata *)msix->arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) if (rcd->ctxt == HFI1_CTRL_CTXT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) cpu = cpumask_first(&entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) set = &entry->rcv_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) case IRQ_NETDEVCTXT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) rcd = (struct hfi1_ctxtdata *)msix->arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) set = &entry->def_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * The general and control contexts are placed on a particular
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * CPU, which is set above. Skip accounting for it. Everything else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * finds its CPU here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (cpu == -1 && set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) cpu = cpu_mask_set_get_first(set, diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) if (cpu < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) free_cpumask_var(diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) free_cpumask_var(diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) cpumask_set_cpu(cpu, &msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) msix->irq, irq_type_names[msix->type],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) extra, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) irq_set_affinity_hint(msix->irq, &msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) if (msix->type == IRQ_SDMA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) sde->cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) hfi1_setup_sdma_notifier(msix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) ret = get_irq_affinity(dd, msix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) struct hfi1_msix_entry *msix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) struct cpu_mask_set *set = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) struct hfi1_ctxtdata *rcd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) mutex_lock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) entry = node_affinity_lookup(dd->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) switch (msix->type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) case IRQ_SDMA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) set = &entry->def_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) hfi1_cleanup_sdma_notifier(msix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) case IRQ_GENERAL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) /* Don't do accounting for general contexts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) case IRQ_RCVCTXT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) rcd = (struct hfi1_ctxtdata *)msix->arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) /* Don't do accounting for control contexts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) if (rcd->ctxt != HFI1_CTRL_CTXT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) set = &entry->rcv_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) case IRQ_NETDEVCTXT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) rcd = (struct hfi1_ctxtdata *)msix->arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) set = &entry->def_intr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) cpumask_andnot(&set->used, &set->used, &msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) _cpu_mask_set_gen_dec(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) irq_set_affinity_hint(msix->irq, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) cpumask_clear(&msix->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) mutex_unlock(&node_affinity.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /* This should be called with node_affinity.lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) struct hfi1_affinity_node_list *affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) int possible, curr_cpu, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) uint num_cores_per_socket = node_affinity.num_online_cpus /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) affinity->num_core_siblings /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) node_affinity.num_online_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) cpumask_copy(hw_thread_mask, &affinity->proc.mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) if (affinity->num_core_siblings > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) /* Removing other siblings not needed for now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) possible = cpumask_weight(hw_thread_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) curr_cpu = cpumask_first(hw_thread_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) for (i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) i < num_cores_per_socket * node_affinity.num_online_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) for (; i < possible; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) cpumask_clear_cpu(curr_cpu, hw_thread_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) /* Identifying correct HW threads within physical cores */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) cpumask_shift_left(hw_thread_mask, hw_thread_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) num_cores_per_socket *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) node_affinity.num_online_nodes *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) hw_thread_no);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) int hfi1_get_proc_affinity(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) int cpu = -1, ret, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) struct hfi1_affinity_node *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) const struct cpumask *node_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) *proc_mask = current->cpus_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) struct hfi1_affinity_node_list *affinity = &node_affinity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) struct cpu_mask_set *set = &affinity->proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * check whether process/context affinity has already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * been set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) if (current->nr_cpus_allowed == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) current->pid, current->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) cpumask_pr_args(proc_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * Mark the pre-set CPU as used. This is atomic so we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) * need the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) cpu = cpumask_first(proc_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) cpumask_set_cpu(cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) current->pid, current->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) cpumask_pr_args(proc_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * The process does not have a preset CPU affinity so find one to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) * recommend using the following algorithm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * For each user process that is opening a context on HFI Y:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * a) If all cores are filled, reinitialize the bitmask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * b) Fill real cores first, then HT cores (First set of HT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * cores on all physical cores, then second set of HT core,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) * and, so on) in the following order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) * 1. Same NUMA node as HFI Y and not running an IRQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * 2. Same NUMA node as HFI Y and running an IRQ handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * 3. Different NUMA node to HFI Y and not running an IRQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * 4. Different NUMA node to HFI Y and running an IRQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * c) Mark core as filled in the bitmask. As user processes are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) * done, clear cores from the bitmask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) goto free_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) goto free_hw_thread_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) goto free_available_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) mutex_lock(&affinity->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) * If we've used all available HW threads, clear the mask and start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * overloading.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) _cpu_mask_set_gen_inc(set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * If NUMA node has CPUs used by interrupt handlers, include them in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) * interrupt handler mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) entry = node_affinity_lookup(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (entry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) cpumask_copy(intrs_mask, (entry->def_intr.gen ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) &entry->def_intr.mask :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) &entry->def_intr.used));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) &entry->rcv_intr.mask :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) &entry->rcv_intr.used));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) cpumask_pr_args(intrs_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) cpumask_copy(hw_thread_mask, &set->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) * If HT cores are enabled, identify which HW threads within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * physical cores should be used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) if (affinity->num_core_siblings > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) for (i = 0; i < affinity->num_core_siblings; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) find_hw_thread_mask(i, hw_thread_mask, affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * If there's at least one available core for this HW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) * thread number, stop looking for a core.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) * diff will always be not empty at least once in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) * loop as the used mask gets reset when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) * (set->mask == set->used) before this loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) cpumask_andnot(diff, hw_thread_mask, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) if (!cpumask_empty(diff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) cpumask_pr_args(hw_thread_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) node_mask = cpumask_of_node(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) cpumask_pr_args(node_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) /* Get cpumask of available CPUs on preferred NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) cpumask_and(available_mask, hw_thread_mask, node_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) cpumask_andnot(available_mask, available_mask, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) cpumask_pr_args(available_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) * At first, we don't want to place processes on the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) * CPUs as interrupt handlers. Then, CPUs running interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) * handlers are used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) * 1) If diff is not empty, then there are CPUs not running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) * non-interrupt handlers available, so diff gets copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) * over to available_mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * 2) If diff is empty, then all CPUs not running interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * handlers are taken, so available_mask contains all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) * available CPUs running interrupt handlers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) * 3) If available_mask is empty, then all CPUs on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) * preferred NUMA node are taken, so other NUMA nodes are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) * used for process assignments using the same method as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * the preferred NUMA node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) cpumask_andnot(diff, available_mask, intrs_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) if (!cpumask_empty(diff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) cpumask_copy(available_mask, diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) /* If we don't have CPUs on the preferred node, use other NUMA nodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (cpumask_empty(available_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) cpumask_andnot(available_mask, hw_thread_mask, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) /* Excluding preferred NUMA cores */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) cpumask_andnot(available_mask, available_mask, node_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) hfi1_cdbg(PROC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) cpumask_pr_args(available_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) * At first, we don't want to place processes on the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) * CPUs as interrupt handlers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) cpumask_andnot(diff, available_mask, intrs_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) if (!cpumask_empty(diff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) cpumask_copy(available_mask, diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) cpumask_pr_args(available_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) cpu = cpumask_first(available_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) if (cpu >= nr_cpu_ids) /* empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) cpumask_set_cpu(cpu, &set->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) mutex_unlock(&affinity->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) free_cpumask_var(intrs_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) free_available_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) free_cpumask_var(available_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) free_hw_thread_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) free_cpumask_var(hw_thread_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) free_diff:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) free_cpumask_var(diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) void hfi1_put_proc_affinity(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) struct hfi1_affinity_node_list *affinity = &node_affinity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) struct cpu_mask_set *set = &affinity->proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) if (cpu < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) mutex_lock(&affinity->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) cpu_mask_set_put(set, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) mutex_unlock(&affinity->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) }