^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (c) 2001 Intel Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (c) 2002 NEC Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (c) 2004 Silicon Graphics, Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Russ Anderson <rja@sgi.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Jesse Barnes <jbarnes@sgi.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Jack Steiner <steiner@sgi.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Platform initialization for Discontig Memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/acpi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/efi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/nodemask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/meminit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/sections.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * Track per-node information needed to setup the boot memory allocator, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * per-node areas, and the real VM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) struct early_node_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) struct ia64_node_data *node_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) unsigned long pernode_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) unsigned long pernode_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) unsigned long min_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) unsigned long max_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static nodemask_t memory_less_mask __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) pg_data_t *pgdat_list[MAX_NUMNODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * To prevent cache aliasing effects, align per-node structures so that they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * start at addresses that are strided by node number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #define NODEDATA_ALIGN(addr, node) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * build_node_maps - callback to setup mem_data structs for each node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * @start: physical start of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * @len: length of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * @node: node where this range resides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * Detect extents of each piece of memory that we wish to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * treat as a virtually contiguous block (i.e. each node). Each such block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * if necessary. Any non-existent pages will simply be part of the virtual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * memmap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static int __init build_node_maps(unsigned long start, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) unsigned long spfn, epfn, end = start + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) if (!mem_data[node].min_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) mem_data[node].min_pfn = spfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) mem_data[node].max_pfn = epfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * early_nr_cpus_node - return number of cpus on a given node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * @node: node to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * Count the number of cpus on @node. We can't use nr_cpus_node() yet because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * called yet. Note that node 0 will also count all non-existent cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) static int early_nr_cpus_node(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) int cpu, n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) for_each_possible_early_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) if (node == node_cpuid[cpu].nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) n++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * compute_pernodesize - compute size of pernode data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * @node: the node id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) static unsigned long compute_pernodesize(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) unsigned long pernodesize = 0, cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) cpus = early_nr_cpus_node(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) pernodesize += PERCPU_PAGE_SIZE * cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) pernodesize += node * L1_CACHE_BYTES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) pernodesize = PAGE_ALIGN(pernodesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) return pernodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * per_cpu_node_setup - setup per-cpu areas on each node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * @cpu_data: per-cpu area on this node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * @node: node to setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * Copy the static per-cpu data into the region we just set aside and then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * setup __per_cpu_offset for each CPU on this node. Return a pointer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * the end of the area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) static void *per_cpu_node_setup(void *cpu_data, int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) for_each_possible_early_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) if (node != node_cpuid[cpu].nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) __per_cpu_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * percpu area for cpu0 is moved from the __init area
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * which is setup by head.S and used till this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * Update ar.k3. This move is ensures that percpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * area for cpu0 is on the correct node and its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * virtual address isn't insanely far from other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * percpu areas which is important for congruent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * percpu allocator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (cpu == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ia64_set_kr(IA64_KR_PER_CPU_DATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) (unsigned long)cpu_data -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) (unsigned long)__per_cpu_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) cpu_data += PERCPU_PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) return cpu_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * setup_per_cpu_areas - setup percpu areas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * Arch code has already allocated and initialized percpu areas. All
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * this function has to do is to teach the determined layout to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * dynamic percpu allocator, which happens to be more complex than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * creating whole new ones using helpers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) void __init setup_per_cpu_areas(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct pcpu_alloc_info *ai;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) struct pcpu_group_info *gi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) unsigned int *cpu_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) void *base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) unsigned long base_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) unsigned int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) ssize_t static_size, reserved_size, dyn_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) int node, prev_node, unit, nr_units;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (!ai)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) panic("failed to allocate pcpu_alloc_info");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) cpu_map = ai->groups[0].cpu_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* determine base */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) base = (void *)ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) base = min(base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) base_offset = (void *)__per_cpu_start - base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /* build cpu_map, units are grouped by node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) unit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (node == node_cpuid[cpu].nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) cpu_map[unit++] = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) nr_units = unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /* set basic parameters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) static_size = __per_cpu_end - __per_cpu_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) reserved_size = PERCPU_MODULE_RESERVE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (dyn_size < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) panic("percpu area overflow static=%zd reserved=%zd\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) static_size, reserved_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) ai->static_size = static_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) ai->reserved_size = reserved_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) ai->dyn_size = dyn_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) ai->unit_size = PERCPU_PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ai->atom_size = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) ai->alloc_size = PERCPU_PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * CPUs are put into groups according to node. Walk cpu_map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * and create new groups at node boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) prev_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ai->nr_groups = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) for (unit = 0; unit < nr_units; unit++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) cpu = cpu_map[unit];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) node = node_cpuid[cpu].nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) if (node == prev_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) gi->nr_units++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) prev_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) gi = &ai->groups[ai->nr_groups++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) gi->nr_units = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) gi->base_offset = __per_cpu_offset[cpu] + base_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) gi->cpu_map = &cpu_map[unit];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) pcpu_setup_first_chunk(ai, base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) pcpu_free_alloc_info(ai);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * fill_pernode - initialize pernode data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * @node: the node id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * @pernode: physical address of pernode data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * @pernodesize: size of the pernode data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) static void __init fill_pernode(int node, unsigned long pernode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) unsigned long pernodesize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) void *cpu_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) int cpus = early_nr_cpus_node(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) mem_data[node].pernode_addr = pernode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) mem_data[node].pernode_size = pernodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) memset(__va(pernode), 0, pernodesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) cpu_data = (void *)pernode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) pernode += PERCPU_PAGE_SIZE * cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) pernode += node * L1_CACHE_BYTES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) pgdat_list[node] = __va(pernode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) mem_data[node].node_data = __va(pernode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) cpu_data = per_cpu_node_setup(cpu_data, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * find_pernode_space - allocate memory for memory map and per-node structures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * @start: physical start of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * @len: length of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) * @node: node where this range resides
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) * This routine reserves space for the per-cpu data struct, the list of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * pg_data_ts and the per-node data struct. Each node will have something like
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * the following in the first chunk of addr. space large enough to hold it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * ________________________
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * | PERCPU_PAGE_SIZE * | start and length big enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * |------------------------|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * | local pg_data_t * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * |------------------------|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * | local ia64_node_data |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * |------------------------|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * | ??? |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * |________________________|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * Once this space has been set aside, the bootmem maps are initialized. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * could probably move the allocation of the per-cpu and ia64_node_data space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * outside of this function and use alloc_bootmem_node(), but doing it here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * is straightforward and we get the alignments we want so...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) static int __init find_pernode_space(unsigned long start, unsigned long len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) unsigned long spfn, epfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) unsigned long pernodesize = 0, pernode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) spfn = start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) epfn = (start + len) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * Make sure this memory falls within this node's usable memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * since we may have thrown some away in build_maps().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) /* Don't setup this node's local space twice... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) if (mem_data[node].pernode_addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * Calculate total size needed, incl. what's necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * for good alignment and alias prevention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) pernodesize = compute_pernodesize(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) pernode = NODEDATA_ALIGN(start, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) /* Is this range big enough for what we want to store here? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (start + len > (pernode + pernodesize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) fill_pernode(node, pernode, pernodesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * reserve_pernode_space - reserve memory for per-node space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * Reserve the space used by the bootmem maps & per-node space in the boot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * allocator so that when we actually create the real mem maps we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * use their memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void __init reserve_pernode_space(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) unsigned long base, size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) if (node_isset(node, memory_less_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) /* Now the per-node space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) size = mem_data[node].pernode_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) base = __pa(mem_data[node].pernode_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) memblock_reserve(base, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) static void scatter_node_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) pg_data_t **dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * for_each_online_node() can't be used at here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * node_online_map is not set for hot-added nodes at this time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * because we are halfway through initialization of the new node's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * structures. If for_each_online_node() is used, a new node's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * pg_data_ptrs will be not initialized. Instead of using it,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) * pgdat_list[] is checked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (pgdat_list[node]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) memcpy(dst, pgdat_list, sizeof(pgdat_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * initialize_pernode_data - fixup per-cpu & per-node pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * Each node's per-node area has a copy of the global pg_data_t list, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * we copy that to each node here, as well as setting the per-cpu pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * to the local node data structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) static void __init initialize_pernode_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) int cpu, node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) scatter_node_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* Set the node_data pointer for each per-cpu struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) for_each_possible_early_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) node = node_cpuid[cpu].nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) per_cpu(ia64_cpu_info, cpu).node_data =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) mem_data[node].node_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) struct cpuinfo_ia64 *cpu0_cpu_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) cpu = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) node = node_cpuid[cpu].nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) ((char *)&ia64_cpu_info - __per_cpu_start));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) cpu0_cpu_info->node_data = mem_data[node].node_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * node but fall back to any other node when __alloc_bootmem_node fails
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * for best.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) * @nid: node id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) * @pernodesize: size of this node's pernode data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) void *ptr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) u8 best = 0xff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int bestnode = NUMA_NO_NODE, node, anynode = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) if (node_isset(node, memory_less_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) else if (node_distance(nid, node) < best) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) best = node_distance(nid, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) bestnode = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) anynode = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (bestnode == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) bestnode = anynode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) __pa(MAX_DMA_ADDRESS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) MEMBLOCK_ALLOC_ACCESSIBLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) bestnode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) if (!ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) __pa(MAX_DMA_ADDRESS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) return ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * memory_less_nodes - allocate and initialize CPU only nodes pernode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * information.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) static void __init memory_less_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) unsigned long pernodesize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) void *pernode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) for_each_node_mask(node, memory_less_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) pernodesize = compute_pernodesize(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) pernode = memory_less_node_alloc(node, pernodesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) fill_pernode(node, __pa(pernode), pernodesize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) * find_memory - walk the EFI memory map and setup the bootmem allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * Called early in boot to setup the bootmem allocator, and to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * allocate the per-cpu and per-node structures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) void __init find_memory(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) reserve_memory();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) efi_memmap_walk(filter_memory, register_active_ranges);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if (num_online_nodes() == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) printk(KERN_ERR "node info missing!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) node_set_online(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) nodes_or(memory_less_mask, memory_less_mask, node_online_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) min_low_pfn = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) max_low_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) /* These actually end up getting called by call_pernode_memory() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) efi_memmap_walk(filter_rsvd_memory, build_node_maps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) efi_memmap_walk(find_max_min_low_pfn, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) for_each_online_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) if (mem_data[node].min_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) node_clear(node, memory_less_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) reserve_pernode_space();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) memory_less_nodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) initialize_pernode_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) max_pfn = max_low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) find_initrd();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * per_cpu_init - setup per-cpu variables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * find_pernode_space() does most of this already, we just need to set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * local_per_cpu_offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) void *per_cpu_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) static int first_time = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (first_time) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) first_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) for_each_possible_early_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * call_pernode_memory - use SRAT to call callback functions with node info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * @start: physical start of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * @len: length of range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * @arg: function to call for each range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) * out to which node a block of memory belongs. Ignore memory that we cannot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * identify, and split blocks that run across multiple nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * Take this opportunity to round the start address up and the end address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) * down to page boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) unsigned long rs, re, end = start + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) void (*func)(unsigned long, unsigned long, int);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) start = PAGE_ALIGN(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) end &= PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) if (start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) func = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (!num_node_memblks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) /* No SRAT table, so assume one node (node 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) if (start < end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) (*func)(start, end - start, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) for (i = 0; i < num_node_memblks; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) rs = max(start, node_memblk[i].start_paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) re = min(end, node_memblk[i].start_paddr +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) node_memblk[i].size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) if (rs < re)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) (*func)(rs, re - rs, node_memblk[i].nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) if (re == end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * paging_init - setup page tables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * paging_init() sets up the page tables for each node of the system and frees
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) * the bootmem allocator memory for general use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) void __init paging_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) unsigned long max_dma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) unsigned long pfn_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) unsigned long max_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) unsigned long max_zone_pfns[MAX_NR_ZONES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) sparse_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) #ifdef CONFIG_VIRTUAL_MEM_MAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) sizeof(struct page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) vmem_map = (struct page *) VMALLOC_END;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) efi_memmap_walk(create_mem_map_page_table, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) printk("Virtual mem_map starts at 0x%p\n", vmem_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) pfn_offset = mem_data[node].min_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) #ifdef CONFIG_VIRTUAL_MEM_MAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (mem_data[node].max_pfn > max_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) max_pfn = mem_data[node].max_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) #ifdef CONFIG_ZONE_DMA32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) max_zone_pfns[ZONE_DMA32] = max_dma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) max_zone_pfns[ZONE_NORMAL] = max_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) free_area_init(max_zone_pfns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) #ifdef CONFIG_MEMORY_HOTPLUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) pg_data_t *arch_alloc_nodedata(int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) unsigned long size = compute_pernodesize(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) return kzalloc(size, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) void arch_free_nodedata(pg_data_t *pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) kfree(pgdat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) pgdat_list[update_node] = update_pgdat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) scatter_node_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) #ifdef CONFIG_SPARSEMEM_VMEMMAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return vmemmap_populate_basepages(start, end, node, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) void vmemmap_free(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) struct vmem_altmap *altmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) #endif