^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* Common code for 32 and 64-bit NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/acpi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/mmzone.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/nodemask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/e820/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <asm/proto.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/dma.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <asm/amd_nb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "numa_internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) int numa_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) nodemask_t numa_nodes_parsed __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) EXPORT_SYMBOL(node_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) static int numa_distance_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static u8 *numa_distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static __init int numa_setup(char *opt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) if (!opt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (!strncmp(opt, "off", 3))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) numa_off = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) if (!strncmp(opt, "fake=", 5))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return numa_emu_cmdline(opt + 5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) if (!strncmp(opt, "noacpi", 6))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) disable_srat();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) if (!strncmp(opt, "nohmat", 6))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) disable_hmat();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) early_param("numa", numa_setup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * apicid, cpu, node mappings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) s16 __apicid_to_node[MAX_LOCAL_APIC] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) int numa_cpu_node(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) if (apicid != BAD_APICID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) return __apicid_to_node[apicid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) EXPORT_SYMBOL(node_to_cpumask_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * Map cpu index to node index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) void numa_set_node(int cpu, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /* early setting, no percpu area yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) if (cpu_to_node_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) cpu_to_node_map[cpu] = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #ifdef CONFIG_DEBUG_PER_CPU_MAPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) per_cpu(x86_cpu_to_node_map, cpu) = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) set_cpu_numa_node(cpu, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) void numa_clear_node(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) numa_set_node(cpu, NUMA_NO_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * Allocate node_to_cpumask_map based on number of available nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * Requires node_possible_map to be valid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Note: cpumask_of_node() is not valid until after this is done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) void __init setup_node_to_cpumask_map(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) unsigned int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /* setup nr_node_ids if not done yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) if (nr_node_ids == MAX_NUMNODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) setup_nr_node_ids();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /* allocate the map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) for (node = 0; node < nr_node_ids; node++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /* cpumask_of_node() will now work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct numa_meminfo *mi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /* ignore zero length blks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) if (start == end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* whine about and ignore invalid blks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) nid, start, end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) if (mi->nr_blks >= NR_NODE_MEMBLKS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) pr_err("too many memblk ranges\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) mi->blk[mi->nr_blks].start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) mi->blk[mi->nr_blks].end = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) mi->blk[mi->nr_blks].nid = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) mi->nr_blks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * @idx: Index of memblk to remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * @mi: numa_meminfo to remove memblk from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * decrementing @mi->nr_blks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) mi->nr_blks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) memmove(&mi->blk[idx], &mi->blk[idx + 1],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) (mi->nr_blks - idx) * sizeof(mi->blk[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * @dst: numa_meminfo to append block to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * @idx: Index of memblk to remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * @src: numa_meminfo to remove memblk from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct numa_meminfo *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) dst->blk[dst->nr_blks++] = src->blk[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) numa_remove_memblk_from(idx, src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * numa_add_memblk - Add one numa_memblk to numa_meminfo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * @nid: NUMA node ID of the new memblk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * @start: Start address of the new memblk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * @end: End address of the new memblk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * Add a new memblk to the default numa_meminfo.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * RETURNS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) int __init numa_add_memblk(int nid, u64 start, u64 end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) return numa_add_memblk_to(nid, start, end, &numa_meminfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* Allocate NODE_DATA for a node on the local memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) static void __init alloc_node_data(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) u64 nd_pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) void *nd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) int tnid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Allocate node data. Try node-local memory and then any node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * Never allocate in DMA zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (!nd_pa) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) nd_size, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) nd = __va(nd_pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* report and initialize */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) nd_pa, nd_pa + nd_size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (tnid != nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) node_data[nid] = nd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) node_set_online(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * numa_cleanup_meminfo - Cleanup a numa_meminfo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * @mi: numa_meminfo to clean up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * Sanitize @mi by merging and removing unnecessary memblks. Also check for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) * conflicts and clear unused memblks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * RETURNS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) const u64 low = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) const u64 high = PFN_PHYS(max_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) int i, j, k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /* first, trim all entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) for (i = 0; i < mi->nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) struct numa_memblk *bi = &mi->blk[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* move / save reserved memory ranges */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (!memblock_overlaps_region(&memblock.memory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) bi->start, bi->end - bi->start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /* make sure all non-reserved blocks are inside the limits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) bi->start = max(bi->start, low);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /* preserve info for non-RAM areas above 'max_pfn': */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) if (bi->end > high) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) numa_add_memblk_to(bi->nid, high, bi->end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) &numa_reserved_meminfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) bi->end = high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /* and there's no empty block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (bi->start >= bi->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) numa_remove_memblk_from(i--, mi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /* merge neighboring / overlapping entries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) for (i = 0; i < mi->nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) struct numa_memblk *bi = &mi->blk[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) for (j = i + 1; j < mi->nr_blks; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) struct numa_memblk *bj = &mi->blk[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) u64 start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * See whether there are overlapping blocks. Whine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * about but allow overlaps of the same nid. They
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * will be merged below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (bi->end > bj->start && bi->start < bj->end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) if (bi->nid != bj->nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bi->nid, bi->start, bi->end - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) bj->nid, bj->start, bj->end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) bi->nid, bi->start, bi->end - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) bj->start, bj->end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * Join together blocks on the same node, holes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * between which don't overlap with memory on other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (bi->nid != bj->nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) start = min(bi->start, bj->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) end = max(bi->end, bj->end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) for (k = 0; k < mi->nr_blks; k++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) struct numa_memblk *bk = &mi->blk[k];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (bi->nid == bk->nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (start < bk->end && end > bk->start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (k < mi->nr_blks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) bi->nid, bi->start, bi->end - 1, bj->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) bj->end - 1, start, end - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) bi->start = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) bi->end = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) numa_remove_memblk_from(j--, mi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) /* clear unused ones */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) mi->blk[i].start = mi->blk[i].end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) mi->blk[i].nid = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * Set nodes, which have memory in @mi, in *@nodemask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) const struct numa_meminfo *mi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (mi->blk[i].start != mi->blk[i].end &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) mi->blk[i].nid != NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) node_set(mi->blk[i].nid, *nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * numa_reset_distance - Reset NUMA distance table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * The current table is freed. The next numa_set_distance() call will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * create a new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) void __init numa_reset_distance(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* numa_distance could be 1LU marking allocation failure, test cnt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if (numa_distance_cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) memblock_free(__pa(numa_distance), size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) numa_distance_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) numa_distance = NULL; /* enable table creation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) static int __init numa_alloc_distance(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) nodemask_t nodes_parsed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) size_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) int i, j, cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) u64 phys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) /* size the new table and allocate it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) nodes_parsed = numa_nodes_parsed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) for_each_node_mask(i, nodes_parsed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) cnt = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) size = cnt * cnt * sizeof(numa_distance[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) size, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (!phys) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) pr_warn("Warning: can't allocate distance table!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) /* don't retry until explicitly reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) numa_distance = (void *)1LU;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) memblock_reserve(phys, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) numa_distance = __va(phys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) numa_distance_cnt = cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /* fill with the default distances */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) for (i = 0; i < cnt; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) for (j = 0; j < cnt; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) numa_distance[i * cnt + j] = i == j ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) LOCAL_DISTANCE : REMOTE_DISTANCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * numa_set_distance - Set NUMA distance from one NUMA to another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * @from: the 'from' node to set distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * @to: the 'to' node to set distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * @distance: NUMA distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * Set the distance from node @from to @to to @distance. If distance table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * doesn't exist, one which is large enough to accommodate all the currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * known nodes will be created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * If such table cannot be allocated, a warning is printed and further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * calls are ignored until the distance table is reset with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * numa_reset_distance().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * If @from or @to is higher than the highest known node or lower than zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * at the time of table creation or @distance doesn't make sense, the call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * is ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) * This is to allow simplification of specific NUMA config implementations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) void __init numa_set_distance(int from, int to, int distance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) if (!numa_distance && numa_alloc_distance() < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) from < 0 || to < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) from, to, distance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if ((u8)distance != distance ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) (from == to && distance != LOCAL_DISTANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) from, to, distance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) numa_distance[from * numa_distance_cnt + to] = distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) int __node_distance(int from, int to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (from >= numa_distance_cnt || to >= numa_distance_cnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return numa_distance[from * numa_distance_cnt + to];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) EXPORT_SYMBOL(__node_distance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * Sanity check to catch more bad NUMA configurations (they are amazingly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * common). Make sure the nodes cover all memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) u64 numaram, e820ram;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) numaram = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) for (i = 0; i < mi->nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) u64 s = mi->blk[i].start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) u64 e = mi->blk[i].end >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) numaram += e - s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) if ((s64)numaram < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) numaram = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) (numaram << PAGE_SHIFT) >> 20,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) (e820ram << PAGE_SHIFT) >> 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * Mark all currently memblock-reserved physical memory (which covers the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * kernel's own memory ranges) as hot-unswappable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) static void __init numa_clear_kernel_node_hotplug(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) nodemask_t reserved_nodemask = NODE_MASK_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) struct memblock_region *mb_region;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * We have to do some preprocessing of memblock regions, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * make them suitable for reservation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) * At this time, all memory regions reserved by memblock are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) * used by the kernel, but those regions are not split up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * along node boundaries yet, and don't necessarily have their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * node ID set yet either.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * So iterate over all memory known to the x86 architecture,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * and use those ranges to set the nid in memblock.reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * This will split up the memblock regions along node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * boundaries and will set the node IDs as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) for (i = 0; i < numa_meminfo.nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) struct numa_memblk *mb = numa_meminfo.blk + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) WARN_ON_ONCE(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * Now go over all reserved memblock regions, to construct a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * node mask of all kernel reserved memory areas.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * numa_meminfo might not include all memblock.reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * memory ranges, because quirks such as trim_snb_memory()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * reserve specific pages for Sandy Bridge graphics. ]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) for_each_reserved_mem_region(mb_region) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) int nid = memblock_get_region_node(mb_region);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (nid != MAX_NUMNODES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) node_set(nid, reserved_nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * belonging to the reserved node mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * Note that this will include memory regions that reside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * on nodes that contain kernel memory - entire nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * become hot-unpluggable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) for (i = 0; i < numa_meminfo.nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) struct numa_memblk *mb = numa_meminfo.blk + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) if (!node_isset(mb->nid, reserved_nodemask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) memblock_clear_hotplug(mb->start, mb->end - mb->start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static int __init numa_register_memblks(struct numa_meminfo *mi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) int i, nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) /* Account for nodes with cpus and no memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) node_possible_map = numa_nodes_parsed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) numa_nodemask_from_meminfo(&node_possible_map, mi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (WARN_ON(nodes_empty(node_possible_map)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) for (i = 0; i < mi->nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) struct numa_memblk *mb = &mi->blk[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) memblock_set_node(mb->start, mb->end - mb->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) &memblock.memory, mb->nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * At very early time, the kernel have to use some memory such as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * loading the kernel image. We cannot prevent this anyway. So any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * node the kernel resides in should be un-hotpluggable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * And when we come here, alloc node data won't fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) numa_clear_kernel_node_hotplug();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * If sections array is gonna be used for pfn -> nid mapping, check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * whether its granularity is fine enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) unsigned long pfn_align = node_map_pfn_alignment();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) if (pfn_align && pfn_align < PAGES_PER_SECTION) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) PFN_PHYS(pfn_align) >> 20,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) PFN_PHYS(PAGES_PER_SECTION) >> 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) if (!numa_meminfo_cover_memory(mi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) /* Finally register nodes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) for_each_node_mask(nid, node_possible_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) u64 start = PFN_PHYS(max_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) u64 end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) for (i = 0; i < mi->nr_blks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) if (nid != mi->blk[i].nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) start = min(mi->blk[i].start, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) end = max(mi->blk[i].end, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * Don't confuse VM with a node that doesn't have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * minimum amount of memory:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (end && (end - start) < NODE_MIN_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) alloc_node_data(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) /* Dump memblock with node info and return. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) memblock_dump_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * There are unfortunately some poorly designed mainboards around that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * only connect memory to a single CPU. This breaks the 1:1 cpu->node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * mapping. To avoid this fill in the mapping for all possible CPUs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * as the number of CPUs is not known yet. We round robin the existing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) static void __init numa_init_array(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) int rr, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) rr = first_node(node_online_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) for (i = 0; i < nr_cpu_ids; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) if (early_cpu_to_node(i) != NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) numa_set_node(i, rr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) rr = next_node_in(rr, node_online_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) static int __init numa_init(int (*init_func)(void))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) for (i = 0; i < MAX_LOCAL_APIC; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) set_apicid_to_node(i, NUMA_NO_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) nodes_clear(numa_nodes_parsed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) nodes_clear(node_possible_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) nodes_clear(node_online_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) memset(&numa_meminfo, 0, sizeof(numa_meminfo));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) MAX_NUMNODES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) MAX_NUMNODES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) /* In case that parsing SRAT failed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) numa_reset_distance();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) ret = init_func();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * We reset memblock back to the top-down direction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * here because if we configured ACPI_NUMA, we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * parsed SRAT in init_func(). It is ok to have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * reset here even if we did't configure ACPI_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) * or acpi numa init fails and fallbacks to dummy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) * numa init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) memblock_set_bottom_up(false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) ret = numa_cleanup_meminfo(&numa_meminfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) numa_emulation(&numa_meminfo, numa_distance_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) ret = numa_register_memblks(&numa_meminfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) for (i = 0; i < nr_cpu_ids; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) int nid = early_cpu_to_node(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (!node_online(nid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) numa_clear_node(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) numa_init_array();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * dummy_numa_init - Fallback dummy NUMA init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * Used if there's no underlying NUMA architecture, NUMA initialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) * fails, or NUMA is disabled on the command line.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * Must online at least one node and add memory blocks that cover all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * allowed memory. This function must not fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) static int __init dummy_numa_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) printk(KERN_INFO "%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) numa_off ? "NUMA turned off" : "No NUMA configuration found");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 0LLU, PFN_PHYS(max_pfn) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) node_set(0, numa_nodes_parsed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) * x86_numa_init - Initialize NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * Try each configured NUMA initialization method until one succeeds. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * last fallback is dummy single node config encompassing whole memory and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * never fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) void __init x86_numa_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) if (!numa_off) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) #ifdef CONFIG_ACPI_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (!numa_init(x86_acpi_numa_init))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) #ifdef CONFIG_AMD_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) if (!numa_init(amd_numa_init))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) numa_init(dummy_numa_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static void __init init_memory_less_node(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) /* Allocate and initialize node data. Memory-less node is now online.*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) alloc_node_data(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) free_area_init_memoryless_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) * All zonelists will be built later in start_kernel() after per cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) * areas are initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) * A node may exist which has one or more Generic Initiators but no CPUs and no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) * memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) * This function must be called after init_cpu_to_node(), to ensure that any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) * memoryless CPU nodes have already been brought online, and before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * node_data[nid] is needed for zone list setup in build_all_zonelists().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * When this function is called, any nodes containing either memory and/or CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * will already be online and there is no need to do anything extra, even if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * they also contain one or more Generic Initiators.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) void __init init_gi_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) for_each_node_state(nid, N_GENERIC_INITIATOR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) if (!node_online(nid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) init_memory_less_node(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * Setup early cpu_to_node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * and apicid_to_node[] tables have valid entries for a CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * This means we skip cpu_to_node[] initialisation for NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * emulation and faking node case (when running a kernel compiled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * is already initialized in a round robin manner at numa_init_array,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * prior to this call, and this initialization is good enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * for the fake NUMA cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * Called before the per_cpu areas are setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) void __init init_cpu_to_node(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) BUG_ON(cpu_to_apicid == NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) int node = numa_cpu_node(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) if (node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) init_memory_less_node(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) numa_set_node(cpu, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) #ifndef CONFIG_DEBUG_PER_CPU_MAPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) # ifndef CONFIG_NUMA_EMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) void numa_add_cpu(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) void numa_remove_cpu(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) # endif /* !CONFIG_NUMA_EMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) #else /* !CONFIG_DEBUG_PER_CPU_MAPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) int __cpu_to_node(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) printk(KERN_WARNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) "cpu_to_node(%d): usage too early!\n", cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) return per_cpu(x86_cpu_to_node_map, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) EXPORT_SYMBOL(__cpu_to_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) * Same function as cpu_to_node() but used if called before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * per_cpu areas are setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) int early_cpu_to_node(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) if (early_per_cpu_ptr(x86_cpu_to_node_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) if (!cpu_possible(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) printk(KERN_WARNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) return NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) return per_cpu(x86_cpu_to_node_map, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) void debug_cpumask_set_cpu(int cpu, int node, bool enable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) struct cpumask *mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) if (node == NUMA_NO_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) /* early_cpu_to_node() already emits a warning and trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) mask = node_to_cpumask_map[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) if (!mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) pr_err("node_to_cpumask_map[%i] NULL\n", node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) if (enable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) cpumask_set_cpu(cpu, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) cpumask_clear_cpu(cpu, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) enable ? "numa_add_cpu" : "numa_remove_cpu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) cpu, node, cpumask_pr_args(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) # ifndef CONFIG_NUMA_EMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) static void numa_set_cpumask(int cpu, bool enable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) void numa_add_cpu(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) numa_set_cpumask(cpu, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) void numa_remove_cpu(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) numa_set_cpumask(cpu, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) # endif /* !CONFIG_NUMA_EMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) * Returns a pointer to the bitmask of CPUs on Node 'node'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) const struct cpumask *cpumask_of_node(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) if ((unsigned)node >= nr_node_ids) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) printk(KERN_WARNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) node, nr_node_ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return cpu_none_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) if (node_to_cpumask_map[node] == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) printk(KERN_WARNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) "cpumask_of_node(%d): no node_to_cpumask_map!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return cpu_online_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) return node_to_cpumask_map[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) EXPORT_SYMBOL(cpumask_of_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) #ifdef CONFIG_NUMA_KEEP_MEMINFO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) for (i = 0; i < mi->nr_blks; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (mi->blk[i].start <= start && mi->blk[i].end > start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) return mi->blk[i].nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) return NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) int phys_to_target_node(phys_addr_t start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) int nid = meminfo_to_nid(&numa_meminfo, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) * Prefer online nodes, but if reserved memory might be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) * hot-added continue the search with reserved ranges.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) if (nid != NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) return nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) return meminfo_to_nid(&numa_reserved_meminfo, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) EXPORT_SYMBOL_GPL(phys_to_target_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) int memory_add_physaddr_to_nid(u64 start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) int nid = meminfo_to_nid(&numa_meminfo, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (nid == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) nid = numa_meminfo.blk[0].nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) return nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) #endif