mirror of
https://github.com/adulau/aha.git
synced 2024-12-29 12:16:20 +00:00
srat, x86: add support for nodes spanning other nodes
For example, If the physical address layout on a two node system with 8 GB memory is something like: node 0: 0-2GB, 4-6GB node 1: 2-4GB, 6-8GB Current kernels fail to boot/detect this NUMA topology. ACPI SRAT tables can expose such a topology which needs to be supported. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
8705a49c35
commit
6ec6e0d9f2
5 changed files with 44 additions and 18 deletions
|
@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA
|
||||||
help
|
help
|
||||||
Enable ACPI SRAT based node topology detection.
|
Enable ACPI SRAT based node topology detection.
|
||||||
|
|
||||||
|
# Some NUMA nodes have memory ranges that span
|
||||||
|
# other nodes. Even though a pfn is valid and
|
||||||
|
# between a node's start and end pfns, it may not
|
||||||
|
# reside on that node. See memmap_init_zone()
|
||||||
|
# for details.
|
||||||
|
config NODES_SPAN_OTHER_NODES
|
||||||
|
def_bool y
|
||||||
|
depends on X86_64_ACPI_NUMA
|
||||||
|
|
||||||
config NUMA_EMU
|
config NUMA_EMU
|
||||||
bool "NUMA emulation"
|
bool "NUMA emulation"
|
||||||
depends on X86_64 && NUMA
|
depends on X86_64 && NUMA
|
||||||
|
|
|
@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
|
||||||
if (!found)
|
if (!found)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
memnode_shift = compute_hash_shift(nodes, 8);
|
memnode_shift = compute_hash_shift(nodes, 8, NULL);
|
||||||
if (memnode_shift < 0) {
|
if (memnode_shift < 0) {
|
||||||
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
|
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size;
|
||||||
* -1 if node overlap or lost ram (shift too big)
|
* -1 if node overlap or lost ram (shift too big)
|
||||||
*/
|
*/
|
||||||
static int __init populate_memnodemap(const struct bootnode *nodes,
|
static int __init populate_memnodemap(const struct bootnode *nodes,
|
||||||
int numnodes, int shift)
|
int numnodes, int shift, int *nodeids)
|
||||||
{
|
{
|
||||||
unsigned long addr, end;
|
unsigned long addr, end;
|
||||||
int i, res = -1;
|
int i, res = -1;
|
||||||
|
@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
|
||||||
do {
|
do {
|
||||||
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
|
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
|
||||||
return -1;
|
return -1;
|
||||||
memnodemap[addr >> shift] = i;
|
|
||||||
|
if (!nodeids)
|
||||||
|
memnodemap[addr >> shift] = i;
|
||||||
|
else
|
||||||
|
memnodemap[addr >> shift] = nodeids[i];
|
||||||
|
|
||||||
addr += (1UL << shift);
|
addr += (1UL << shift);
|
||||||
} while (addr < end);
|
} while (addr < end);
|
||||||
res = 1;
|
res = 1;
|
||||||
|
@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
|
int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
|
||||||
|
int *nodeids)
|
||||||
{
|
{
|
||||||
int shift;
|
int shift;
|
||||||
|
|
||||||
|
@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
|
||||||
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
|
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
|
||||||
shift);
|
shift);
|
||||||
|
|
||||||
if (populate_memnodemap(nodes, numnodes, shift) != 1) {
|
if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
|
||||||
printk(KERN_INFO "Your memory is not aligned you need to "
|
printk(KERN_INFO "Your memory is not aligned you need to "
|
||||||
"rebuild your kernel with a bigger NODEMAPSIZE "
|
"rebuild your kernel with a bigger NODEMAPSIZE "
|
||||||
"shift=%d\n", shift);
|
"shift=%d\n", shift);
|
||||||
|
@ -462,7 +468,7 @@ done:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
memnode_shift = compute_hash_shift(nodes, num_nodes);
|
memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
|
||||||
if (memnode_shift < 0) {
|
if (memnode_shift < 0) {
|
||||||
memnode_shift = 0;
|
memnode_shift = 0;
|
||||||
printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
|
printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
|
||||||
|
|
|
@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES];
|
||||||
static int found_add_area __initdata;
|
static int found_add_area __initdata;
|
||||||
int hotadd_percent __initdata = 0;
|
int hotadd_percent __initdata = 0;
|
||||||
|
|
||||||
|
static int num_node_memblks __initdata;
|
||||||
|
static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
|
||||||
|
static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
|
||||||
|
|
||||||
/* Too small nodes confuse the VM badly. Usually they result
|
/* Too small nodes confuse the VM badly. Usually they result
|
||||||
from BIOS bugs. */
|
from BIOS bugs. */
|
||||||
#define NODE_MIN_SIZE (4*1024*1024)
|
#define NODE_MIN_SIZE (4*1024*1024)
|
||||||
|
@ -41,17 +45,17 @@ static __init int setup_node(int pxm)
|
||||||
return acpi_map_pxm_to_node(pxm);
|
return acpi_map_pxm_to_node(pxm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __init int conflicting_nodes(unsigned long start, unsigned long end)
|
static __init int conflicting_memblks(unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for_each_node_mask(i, nodes_parsed) {
|
for (i = 0; i < num_node_memblks; i++) {
|
||||||
struct bootnode *nd = &nodes[i];
|
struct bootnode *nd = &node_memblk_range[i];
|
||||||
if (nd->start == nd->end)
|
if (nd->start == nd->end)
|
||||||
continue;
|
continue;
|
||||||
if (nd->end > start && nd->start < end)
|
if (nd->end > start && nd->start < end)
|
||||||
return i;
|
return memblk_nodeid[i];
|
||||||
if (nd->end == end && nd->start == start)
|
if (nd->end == end && nd->start == start)
|
||||||
return i;
|
return memblk_nodeid[i];
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||||
bad_srat();
|
bad_srat();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
i = conflicting_nodes(start, end);
|
i = conflicting_memblks(start, end);
|
||||||
if (i == node) {
|
if (i == node) {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
"SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
|
"SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
|
||||||
|
@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||||
nd->end = end;
|
nd->end = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
|
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
|
||||||
nd->start, nd->end);
|
start, end);
|
||||||
e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
|
e820_register_active_regions(node, start >> PAGE_SHIFT,
|
||||||
nd->end >> PAGE_SHIFT);
|
end >> PAGE_SHIFT);
|
||||||
push_node_boundaries(node, nd->start >> PAGE_SHIFT,
|
push_node_boundaries(node, nd->start >> PAGE_SHIFT,
|
||||||
nd->end >> PAGE_SHIFT);
|
nd->end >> PAGE_SHIFT);
|
||||||
|
|
||||||
|
@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
|
||||||
if ((nd->start | nd->end) == 0)
|
if ((nd->start | nd->end) == 0)
|
||||||
node_clear(node, nodes_parsed);
|
node_clear(node, nodes_parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
node_memblk_range[num_node_memblks].start = start;
|
||||||
|
node_memblk_range[num_node_memblks].end = end;
|
||||||
|
memblk_nodeid[num_node_memblks] = node;
|
||||||
|
num_node_memblks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sanity check to catch more bad SRATs (they are amazingly common).
|
/* Sanity check to catch more bad SRATs (they are amazingly common).
|
||||||
|
@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
|
memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
|
||||||
|
memblk_nodeid);
|
||||||
if (memnode_shift < 0) {
|
if (memnode_shift < 0) {
|
||||||
printk(KERN_ERR
|
printk(KERN_ERR
|
||||||
"SRAT: No NUMA node hash function found. Contact maintainer\n");
|
"SRAT: No NUMA node hash function found. Contact maintainer\n");
|
||||||
|
|
|
@ -9,7 +9,8 @@ struct bootnode {
|
||||||
u64 end;
|
u64 end;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern int compute_hash_shift(struct bootnode *nodes, int numnodes);
|
extern int compute_hash_shift(struct bootnode *nodes, int numblks,
|
||||||
|
int *nodeids);
|
||||||
|
|
||||||
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
|
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue