From 9844ab11c763bfed9f054c82366b19dcda66aca9 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 14 Oct 2009 00:07:03 +0400
Subject: [PATCH 01/30] x86, apic: Introduce the NOOP apic driver

Introduce NOOP APIC driver. We should use it in case if apic was
disabled due to hardware of software/firmware problems (including
user requested to disable it case).

The driver is attempting to catch any inappropriate apic operation
call with warning issue.

Also it is possible to use some apic operation like IPI calls,
read/write without checking for apic presence which should make
callers code easier.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
Cc: macro@linux-mips.org
LKML-Reference: <20091013201022.534682104@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h      |   2 +
 arch/x86/kernel/apic/Makefile    |   2 +-
 arch/x86/kernel/apic/apic_noop.c | 194 +++++++++++++++++++++++++++++++
 3 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/apic/apic_noop.c

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 474d80d3e6c..08a5f420e07 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void)
 
 extern void default_setup_apic_routing(void);
 
+extern struct apic apic_noop;
+
 #ifdef CONFIG_X86_32
 
 extern struct apic apic_default;
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index da7b7b9f8bd..565c1bfc507 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,7 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
new file mode 100644
index 00000000000..0b93ec2fde0
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -0,0 +1,194 @@
+/*
+ * NOOP APIC driver.
+ *
+ * Does almost nothing and should be substituted by a real apic driver via
+ * probe routine.
+ *
+ * Though in case if apic is disabled (for some reason) we try
+ * to not uglify the caller's code and allow to call (some) apic routines
+ * like self-ipi, etc... and issue a warning if an operation is not allowed
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/smp.h>
+#include <asm/ipi.h>
+
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+
+/*
+ * some operations should never be reached with
+ * noop apic if it's not turned off, this mostly
+ * means the caller forgot to disable apic (or
+ * check the apic presence) before doing a call
+ */
+static void warn_apic_enabled(void)
+{
+	WARN_ONCE((cpu_has_apic || !disable_apic),
+		"APIC: Called for NOOP operation with apic enabled\n");
+}
+
+/*
+ * To check operations but do not bloat source code
+ */
+#define NOOP_FUNC(func)			func { warn_apic_enabled(); }
+#define NOOP_FUNC_RET(func, ret)	func { warn_apic_enabled(); return ret; }
+
+NOOP_FUNC(static void noop_init_apic_ldr(void))
+NOOP_FUNC(static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector))
+NOOP_FUNC(static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector))
+NOOP_FUNC(static void noop_send_IPI_allbutself(int vector))
+NOOP_FUNC(static void noop_send_IPI_all(int vector))
+NOOP_FUNC(static void noop_send_IPI_self(int vector))
+NOOP_FUNC_RET(static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip), -1)
+NOOP_FUNC(static void noop_apic_write(u32 reg, u32 v))
+NOOP_FUNC(void noop_apic_wait_icr_idle(void))
+NOOP_FUNC_RET(static u32 noop_safe_apic_wait_icr_idle(void), 0)
+NOOP_FUNC_RET(static u64 noop_apic_icr_read(void), 0)
+NOOP_FUNC(static void noop_apic_icr_write(u32 low, u32 id))
+NOOP_FUNC_RET(static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map), phys_map)
+NOOP_FUNC_RET(static int noop_cpu_to_logical_apicid(int cpu), 1)
+NOOP_FUNC_RET(static int noop_default_phys_pkg_id(int cpuid_apic, int index_msb), 0)
+NOOP_FUNC_RET(static unsigned int noop_get_apic_id(unsigned long x), 0)
+
+static int noop_probe(void)
+{
+	/* should not ever be enabled this way */
+	return 0;
+}
+
+static int noop_apic_id_registered(void)
+{
+	warn_apic_enabled();
+	return physid_isset(read_apic_id(), phys_cpu_present_map);
+}
+
+static const struct cpumask *noop_target_cpus(void)
+{
+	warn_apic_enabled();
+
+	/* only BSP here */
+	return cpumask_of(0);
+}
+
+static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+	warn_apic_enabled();
+	return physid_isset(apicid, bitmap);
+}
+
+static unsigned long noop_check_apicid_present(int bit)
+{
+	warn_apic_enabled();
+	return physid_isset(bit, phys_cpu_present_map);
+}
+
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+	warn_apic_enabled();
+	if (cpu != 0)
+		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
+	cpumask_clear(retmask);
+	cpumask_set_cpu(cpu, retmask);
+}
+
+int noop_apicid_to_node(int logical_apicid)
+{
+	warn_apic_enabled();
+
+	/* we're always on node 0 */
+	return 0;
+}
+
+static u32 noop_apic_read(u32 reg)
+{
+	/*
+	 * noop-read is always safe until we have
+	 * non-disabled unit
+	 */
+	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+	return 0;
+}
+
+struct apic apic_noop = {
+	.name				= "noop",
+	.probe				= noop_probe,
+	.acpi_madt_oem_check		= NULL,
+
+	.apic_id_registered		= noop_apic_id_registered,
+
+	.irq_delivery_mode		= dest_LowestPrio,
+	/* logical delivery broadcast to all CPUs: */
+	.irq_dest_mode			= 1,
+
+	.target_cpus			= noop_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= APIC_DEST_LOGICAL,
+	.check_apicid_used		= noop_check_apicid_used,
+	.check_apicid_present		= noop_check_apicid_present,
+
+	.vector_allocation_domain	= noop_vector_allocation_domain,
+	.init_apic_ldr			= noop_init_apic_ldr,
+
+	.ioapic_phys_id_map		= noop_ioapic_phys_id_map,
+	.setup_apic_routing		= NULL,
+	.multi_timer_check		= NULL,
+	.apicid_to_node			= noop_apicid_to_node,
+
+	.cpu_to_logical_apicid		= noop_cpu_to_logical_apicid,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= default_apicid_to_cpu_present,
+
+	.setup_portio_remap		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.enable_apic_mode		= NULL,
+
+	.phys_pkg_id			= noop_default_phys_pkg_id,
+
+	.mps_oem_check			= NULL,
+
+	.get_apic_id			= noop_get_apic_id,
+	.set_apic_id			= NULL,
+	.apic_id_mask			= 0x0F << 24,
+
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= noop_send_IPI_mask,
+	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= noop_send_IPI_allbutself,
+	.send_IPI_all			= noop_send_IPI_all,
+	.send_IPI_self			= noop_send_IPI_self,
+
+	.wakeup_secondary_cpu		= noop_wakeup_secondary_cpu,
+
+	/* should be safe */
+	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
+	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
+
+	.wait_for_init_deassert		= NULL,
+
+	.smp_callin_clear_local_apic	= NULL,
+	.inquire_remote_apic		= NULL,
+
+	.read				= noop_apic_read,
+	.write				= noop_apic_write,
+	.icr_read			= noop_apic_icr_read,
+	.icr_write			= noop_apic_icr_write,
+	.wait_icr_idle			= noop_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= noop_safe_apic_wait_icr_idle,
+};

From a933c61829509eb27083146dda392132baa0969a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 14 Oct 2009 00:07:04 +0400
Subject: [PATCH 02/30] x86, apic: Use apic noop driver

In case if apic were disabled we may use the whole apic NOOP driver
instead of sparse poking the some functions in apic driver.

Also NOOP would catch any inappropriate apic operation calls (not
just read/write).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
Cc: macro@linux-mips.org
LKML-Reference: <20091013201022.747817361@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 894aa97f071..61a5628810d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -241,28 +241,12 @@ static int modern_apic(void)
 }
 
 /*
- * bare function to substitute write operation
- * and it's _that_ fast :)
- */
-static void native_apic_write_dummy(u32 reg, u32 v)
-{
-	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
-}
-
-static u32 native_apic_read_dummy(u32 reg)
-{
-	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
-	return 0;
-}
-
-/*
- * right after this call apic->write/read doesn't do anything
- * note that there is no restore operation it works one way
+ * right after this call apic become NOOP driven
+ * so apic->write/read doesn't do anything
  */
 void apic_disable(void)
 {
-	apic->read = native_apic_read_dummy;
-	apic->write = native_apic_write_dummy;
+	apic = &apic_noop;
 }
 
 void native_apic_wait_icr_idle(void)

From 2626eb2b2fd958dc0f683126aa84e93b939699a1 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Wed, 14 Oct 2009 00:07:05 +0400
Subject: [PATCH 03/30] x86, apic: Limit apic dumping, introduce new
 show_lapic= setup option

In case if a system has a large number of cpus printing apics
contents may consume a long time period.

We limit such an output by 1 apic by default. But to have an
ability to see all apics or some part of them we introduce
"show_lapic" setup option which allow us to limit/unlimit the
number of APICs being dumped.

Example: apic=debug show_lapic=5, or apic=debug show_lapic=all

Also move apic_verbosity checking upper that way so helper routines
do not need to inspect it at all.

Suggested-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
Cc: macro@linux-mips.org
LKML-Reference: <20091013201022.926793122@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 47 +++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index dc69f28489f..8c718c93d07 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1599,9 +1599,6 @@ __apicdebuginit(void) print_IO_APIC(void)
 	struct irq_desc *desc;
 	unsigned int irq;
 
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 	for (i = 0; i < nr_ioapics; i++)
 		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1708,9 +1705,6 @@ __apicdebuginit(void) print_APIC_field(int base)
 {
 	int i;
 
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
 	printk(KERN_DEBUG);
 
 	for (i = 0; i < 8; i++)
@@ -1724,9 +1718,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	unsigned int i, v, ver, maxlvt;
 	u64 icr;
 
-	if (apic_verbosity == APIC_QUIET)
-		return;
-
 	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
@@ -1824,13 +1815,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 	printk("\n");
 }
 
-__apicdebuginit(void) print_all_local_APICs(void)
+__apicdebuginit(void) print_local_APICs(int maxcpu)
 {
 	int cpu;
 
+	if (!maxcpu)
+		return;
+
 	preempt_disable();
-	for_each_online_cpu(cpu)
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
 		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
 	preempt_enable();
 }
 
@@ -1839,7 +1836,7 @@ __apicdebuginit(void) print_PIC(void)
 	unsigned int v;
 	unsigned long flags;
 
-	if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
+	if (!nr_legacy_irqs)
 		return;
 
 	printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1866,21 +1863,41 @@ __apicdebuginit(void) print_PIC(void)
 	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
-__apicdebuginit(int) print_all_ICs(void)
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
 {
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
+{
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
 	print_PIC();
 
 	/* don't print out if apic is not there */
 	if (!cpu_has_apic && !apic_from_smp_config())
 		return 0;
 
-	print_all_local_APICs();
+	print_local_APICs(show_lapic);
 	print_IO_APIC();
 
 	return 0;
 }
 
-fs_initcall(print_all_ICs);
+fs_initcall(print_ICs);
 
 
 /* Where if anywhere is the i8259 connect in external int mode */

From 6c2c502910247d2820cb630e7b28fb6bdecdbf45 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Wed, 30 Sep 2009 11:02:59 -0500
Subject: [PATCH 04/30] x86: SGI UV: Fix irq affinity for hub based interrupts

This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
NODE affinity can be routed to cpus other than their originally
assigned cpu.  Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20090930160259.GA7822@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_irq.h |  15 +++-
 arch/x86/kernel/apic/io_apic.c   |  49 +++++++++++-
 arch/x86/kernel/uv_irq.c         | 128 +++++++++++++++++++++++++++++--
 drivers/misc/sgi-xp/xpc_uv.c     |   5 +-
 4 files changed, 180 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 9613c8c0b64..5397e129095 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
 		dest		: 32;
 };
 
+enum {
+	UV_AFFINITY_ALL,
+	UV_AFFINITY_NODE,
+	UV_AFFINITY_CPU
+};
+
 extern struct irq_chip uv_irq_chip;
 
-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
 extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);
 
 #endif /* _ASM_X86_UV_UV_IRQ_H */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8c718c93d07..bb52e7f6e95 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset)
+		       unsigned long mmr_offset, int restrict)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	if (err != 0)
 		return err;
 
+	if (restrict == UV_AFFINITY_CPU)
+		desc->status |= IRQ_NO_BALANCING;
+	else
+		desc->status |= IRQ_MOVE_PCNTXT;
+
 	spin_lock_irqsave(&vector_lock, flags);
 	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
 				      irq_name);
@@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
  * Disable the specified MMR located on the specified blade so that MSIs are
  * longer allowed to be sent.
  */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
 {
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
-	int mmr_pnode;
 
 	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
 
@@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
 	entry->mask = 1;
 
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 }
+
+int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+	unsigned int dest;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long mmr_offset;
+	unsigned mmr_pnode;
+
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
+		return -1;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode = apic->irq_dest_mode;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = dest;
+
+	/* Get previously stored MMR and pnode of hub sourcing interrupts */
+	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+		return -1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return 0;
+}
 #endif /* CONFIG_X86_64 */
 
 int __init io_apic_get_redir_entries (int ioapic)
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index aeef529917e..9a83775ab0f 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -9,10 +9,22 @@
  */
 
 #include <linux/module.h>
+#include <linux/rbtree.h>
 #include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+	struct rb_node list;
+	unsigned long offset;
+	int pnode;
+	int irq;
+};
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;
 
 static void uv_noop(unsigned int irq)
 {
@@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = {
 	.unmask		= uv_noop,
 	.eoi		= uv_ack_apic,
 	.end		= uv_noop,
+	.set_affinity	= uv_set_irq_affinity,
 };
 
+/*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+	struct rb_node **link = &uv_irq_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct uv_irq_2_mmr_pnode *n;
+	struct uv_irq_2_mmr_pnode *e;
+	unsigned long irqflags;
+
+	n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+				uv_blade_to_memory_nid(blade));
+	if (!n)
+		return -ENOMEM;
+
+	n->irq = irq;
+	n->offset = offset;
+	n->pnode = uv_blade_to_pnode(blade);
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	/* Find the right place in the rbtree: */
+	while (*link) {
+		parent = *link;
+		e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+		if (unlikely(irq == e->irq)) {
+			/* irq entry exists */
+			e->pnode = uv_blade_to_pnode(blade);
+			e->offset = offset;
+			spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+			kfree(n);
+			return 0;
+		}
+
+		if (irq < e->irq)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Insert the node into the rbtree. */
+	rb_link_node(&n->list, parent, link);
+	rb_insert_color(&n->list, &uv_irq_root);
+
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+	return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+	struct uv_irq_2_mmr_pnode *e;
+	struct rb_node *n;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	n = uv_irq_root.rb_node;
+	while (n) {
+		e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+		if (e->irq == irq) {
+			*offset = e->offset;
+			*pnode = e->pnode;
+			spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+			return 0;
+		}
+
+		if (irq < e->irq)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+	return -1;
+}
+
 /*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.
  */
 int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
-		 unsigned long mmr_offset)
+		 unsigned long mmr_offset, int restrict)
 {
-	int irq;
-	int ret;
+	int irq, ret;
+
+	irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
 
-	irq = create_irq();
 	if (irq <= 0)
 		return -EBUSY;
 
-	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
-	if (ret != irq)
+	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+		restrict);
+	if (ret == irq)
+		uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+	else
 		destroy_irq(irq);
 
 	return ret;
@@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
  *
  * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
  */
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
 {
-	arch_disable_uv_irq(mmr_blade, mmr_offset);
+	struct uv_irq_2_mmr_pnode *e;
+	struct rb_node *n;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	n = uv_irq_root.rb_node;
+	while (n) {
+		e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+		if (e->irq == irq) {
+			arch_disable_uv_irq(e->pnode, e->offset);
+			rb_erase(n, &uv_irq_root);
+			kfree(e);
+			break;
+		}
+		if (irq < e->irq)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
 	destroy_irq(irq);
 }
 EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index c76677afda1..b5bbe59f9c5 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
 
 #if defined CONFIG_X86_64
-	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+			UV_AFFINITY_CPU);
 	if (mq->irq < 0) {
 		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
 			-mq->irq);
@@ -136,7 +137,7 @@ static void
 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
 {
 #if defined CONFIG_X86_64
-	uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+	uv_teardown_irq(mq->irq);
 
 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
 	int mmr_pnode;

From 9338ad6ffb70eca97f335d93c54943828c8b209e Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Tue, 13 Oct 2009 15:32:36 -0500
Subject: [PATCH 05/30] x86, apic: Move SGI UV functionality out of generic
 IO-APIC code

Move UV specific functionality out of the generic IO-APIC code.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20091013203236.GD20543@sgi.com>
[ Cleaned up the code some more in their new places. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h    |  29 +++++--
 arch/x86/include/asm/uv/uv_irq.h |   7 --
 arch/x86/kernel/apic/io_apic.c   | 140 ++-----------------------------
 arch/x86/kernel/uv_irq.c         | 123 +++++++++++++++++++++++++--
 4 files changed, 145 insertions(+), 154 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ba180d93b08..56f0877c932 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -79,14 +79,31 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 					int ioapic, int ioapic_pin,
 					int trigger, int polarity)
 {
-	irq_attr->ioapic     = ioapic;
-	irq_attr->ioapic_pin = ioapic_pin;
-	irq_attr->trigger    = trigger;
-	irq_attr->polarity   = polarity;
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
-					struct io_apic_irq_attr *irq_attr);
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+	struct irq_pin_list	*irq_2_pin;
+	cpumask_var_t		domain;
+	cpumask_var_t		old_domain;
+	unsigned		move_cleanup_count;
+	u8			vector;
+	u8			move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 5397e129095..d6b17c76062 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,13 +31,6 @@ enum {
 	UV_AFFINITY_CPU
 };
 
-extern struct irq_chip uv_irq_chip;
-
-extern int
-arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
-extern void arch_disable_uv_irq(int, unsigned long);
-extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
-
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index bb52e7f6e95..ce16b65cfdc 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -60,8 +60,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
 #include <asm/hw_irq.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>
 
 #include <asm/apic.h>
 
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 	return pin;
 }
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
-	struct irq_pin_list *irq_2_pin;
-	cpumask_var_t domain;
-	cpumask_var_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
 }
 
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	struct irq_cfg *cfg = NULL;
 	struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 /* end for move_irq_desc */
 
 #else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
@@ -1237,8 +1221,7 @@ next:
 	return err;
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -2245,7 +2228,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
 {
 	cpumask_var_t cleanup_mask;
 
@@ -2289,15 +2272,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
 /*
  * Either sets desc->affinity to a valid value, and returns
  * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
  * leaves desc->affinity untouched.
  */
-static unsigned int
+unsigned int
 set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
@@ -3725,116 +3705,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset, int restrict)
-{
-	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
-	int mmr_pnode;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long flags;
-	int err;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	cfg = irq_cfg(irq);
-
-	err = assign_irq_vector(irq, cfg, eligible_cpu);
-	if (err != 0)
-		return err;
-
-	if (restrict == UV_AFFINITY_CPU)
-		desc->status |= IRQ_NO_BALANCING;
-	else
-		desc->status |= IRQ_MOVE_PCNTXT;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-				      irq_name);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->vector		= cfg->vector;
-	entry->delivery_mode	= apic->irq_delivery_mode;
-	entry->dest_mode	= apic->irq_dest_mode;
-	entry->polarity		= 0;
-	entry->trigger		= 0;
-	entry->mask		= 0;
-	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
-
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
-{
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->mask = 1;
-
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-
-int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
-	unsigned int dest;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long mmr_offset;
-	unsigned mmr_pnode;
-
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
-		return -1;
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = dest;
-
-	/* Get previously stored MMR and pnode of hub sourcing interrupts */
-	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
-		return -1;
-
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return 0;
-}
-#endif /* CONFIG_X86_64 */
-
 int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 9a83775ab0f..61d805df4c9 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -18,13 +18,16 @@
 
 /* MMR offset and pnode of hub sourcing interrupts for a given irq */
 struct uv_irq_2_mmr_pnode{
-	struct rb_node list;
-	unsigned long offset;
-	int pnode;
-	int irq;
+	struct rb_node		list;
+	unsigned long		offset;
+	int			pnode;
+	int			irq;
 };
-static spinlock_t uv_irq_lock;
-static struct rb_root uv_irq_root;
+
+static spinlock_t		uv_irq_lock;
+static struct rb_root		uv_irq_root;
+
+static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
 static void uv_noop(unsigned int irq)
 {
@@ -131,6 +134,114 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
 	return -1;
 }
 
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+		       unsigned long mmr_offset, int restrict)
+{
+	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg;
+	int mmr_pnode;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+			sizeof(unsigned long));
+
+	cfg = irq_cfg(irq);
+
+	err = assign_irq_vector(irq, cfg, eligible_cpu);
+	if (err != 0)
+		return err;
+
+	if (restrict == UV_AFFINITY_CPU)
+		desc->status |= IRQ_NO_BALANCING;
+	else
+		desc->status |= IRQ_MOVE_PCNTXT;
+
+	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+				      irq_name);
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+			sizeof(unsigned long));
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	entry->mask = 1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+	unsigned int dest;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long mmr_offset;
+	unsigned mmr_pnode;
+
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
+		return -1;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= dest;
+
+	/* Get previously stored MMR and pnode of hub sourcing interrupts */
+	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+		return -1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return 0;
+}
+
 /*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an

From 7ec13187ef48b04bb7f6dfa266c7271a52d009c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 14 Oct 2009 15:06:42 +0200
Subject: [PATCH 06/30] x86, apic: Fix prototype in hw_irq.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This warning:

 In file included from arch/x86/include/asm/ipi.h:23,
                  from arch/x86/kernel/apic/apic_noop.c:27:
 arch/x86/include/asm/hw_irq.h:105: warning: ‘struct irq_desc’ declared inside parameter list
 arch/x86/include/asm/hw_irq.h:105: warning: its scope is only this definition or declaration, which is probably not what you want

triggers because irq_desc is defined after hw_irq.h is included
in irq.h. Since it's pointer reference only, a forward declaration
of the type will solve the problem.

LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 56f0877c932..1984ce9a13d 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -102,6 +102,8 @@ struct irq_cfg {
 extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
+
+struct irq_desc;
 extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);

From 9636bc0555e3f383c120ddcffe4b7c5c58a10b1a Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Wed, 14 Oct 2009 19:09:04 +0400
Subject: [PATCH 07/30] x86, apic: Explain show_lapic= in kernel parameters
 list

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: yinghai@kernel.org
Cc: macro@linux-mips.org
LKML-Reference: <20091014150904.GA5259@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/kernel-parameters.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9107b387e91..465a786a378 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -345,6 +345,15 @@ and is between 256 and 4096 characters. It is defined in the file
 			Change the amount of debugging information output
 			when initialising the APIC and IO-APIC components.
 
+	show_lapic=	[APIC,X86] Advanced Programmable Interrupt Controller
+			Limit apic dumping. The parameter defines the maximal
+			number of local apics being dumped. Also it is possible
+			to set it to "all" by meaning -- no limit here.
+			Format: { 1 (default) | 2 | ... | all }.
+			The parameter valid if only apic=debug or
+			apic=verbose is specified.
+			Example: apic=debug show_lapic=all
+
 	apm=		[APM] Advanced Power Management
 			See header of arch/x86/kernel/apm_32.c.
 

From f88f2b4fdb1e098433ad2b005b6f7353f7268ce1 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 15 Oct 2009 19:04:16 +0400
Subject: [PATCH 08/30] x86: apic: Allow noop operations to be called almost at
 any time

As only apic noop is used we allow to use almost any operation
caller wants (and which of them noop driver supports of
course).

Initially it was reported by Ingo Molnar that apic noop
issue a warning for pkg id (which is actually false positive
and should be eliminated).

So we save checking (and warning issue) for read/write
operations while allow any other ops to be freely used.

Also:
 - fix noop_cpu_to_logical_apicid, it should be 0.
 - rename noop_default_phys_pkg_id to noop_phys_pkg_id
   (we use default_ prefix for more general routines
    in apic subsystem).

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
LKML-Reference: <20091015150416.GC5331@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c      |   1 +
 arch/x86/kernel/apic/apic_noop.c | 103 +++++++++++++++++--------------
 2 files changed, 58 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 61a5628810d..dce93d4b0ea 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -246,6 +246,7 @@ static int modern_apic(void)
  */
 void apic_disable(void)
 {
+	pr_info("APIC: switched to apic NOOP\n");
 	apic = &apic_noop;
 }
 
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 0b93ec2fde0..9ab6ffb313a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -6,7 +6,7 @@
  *
  * Though in case if apic is disabled (for some reason) we try
  * to not uglify the caller's code and allow to call (some) apic routines
- * like self-ipi, etc... and issue a warning if an operation is not allowed
+ * like self-ipi, etc...
  */
 
 #include <linux/threads.h>
@@ -30,76 +30,88 @@
 #include <asm/acpi.h>
 #include <asm/e820.h>
 
-/*
- * some operations should never be reached with
- * noop apic if it's not turned off, this mostly
- * means the caller forgot to disable apic (or
- * check the apic presence) before doing a call
- */
-static void warn_apic_enabled(void)
+static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_allbutself(int vector) { }
+static void noop_send_IPI_all(int vector) { }
+static void noop_send_IPI_self(int vector) { }
+static void noop_apic_wait_icr_idle(void) { }
+static void noop_apic_icr_write(u32 low, u32 id) { }
+
+static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
 {
-	WARN_ONCE((cpu_has_apic || !disable_apic),
-		"APIC: Called for NOOP operation with apic enabled\n");
+	return -1;
 }
 
-/*
- * To check operations but do not bloat source code
- */
-#define NOOP_FUNC(func)			func { warn_apic_enabled(); }
-#define NOOP_FUNC_RET(func, ret)	func { warn_apic_enabled(); return ret; }
+static u32 noop_safe_apic_wait_icr_idle(void)
+{
+	return 0;
+}
 
-NOOP_FUNC(static void noop_init_apic_ldr(void))
-NOOP_FUNC(static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector))
-NOOP_FUNC(static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector))
-NOOP_FUNC(static void noop_send_IPI_allbutself(int vector))
-NOOP_FUNC(static void noop_send_IPI_all(int vector))
-NOOP_FUNC(static void noop_send_IPI_self(int vector))
-NOOP_FUNC_RET(static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip), -1)
-NOOP_FUNC(static void noop_apic_write(u32 reg, u32 v))
-NOOP_FUNC(void noop_apic_wait_icr_idle(void))
-NOOP_FUNC_RET(static u32 noop_safe_apic_wait_icr_idle(void), 0)
-NOOP_FUNC_RET(static u64 noop_apic_icr_read(void), 0)
-NOOP_FUNC(static void noop_apic_icr_write(u32 low, u32 id))
-NOOP_FUNC_RET(static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map), phys_map)
-NOOP_FUNC_RET(static int noop_cpu_to_logical_apicid(int cpu), 1)
-NOOP_FUNC_RET(static int noop_default_phys_pkg_id(int cpuid_apic, int index_msb), 0)
-NOOP_FUNC_RET(static unsigned int noop_get_apic_id(unsigned long x), 0)
+static u64 noop_apic_icr_read(void)
+{
+	return 0;
+}
+
+static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map)
+{
+	return phys_map;
+}
+
+static int noop_cpu_to_logical_apicid(int cpu)
+{
+	return 0;
+}
+
+static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+	return 0;
+}
+
+static unsigned int noop_get_apic_id(unsigned long x)
+{
+	return 0;
+}
 
 static int noop_probe(void)
 {
-	/* should not ever be enabled this way */
+	/*
+	 * NOOP apic should not ever be
+	 * enabled via probe routine
+	 */
 	return 0;
 }
 
 static int noop_apic_id_registered(void)
 {
-	warn_apic_enabled();
-	return physid_isset(read_apic_id(), phys_cpu_present_map);
+	/*
+	 * if we would be really "pedantic"
+	 * we should pass read_apic_id() here
+	 * but since NOOP suppose APIC ID = 0
+	 * lets save a few cycles
+	 */
+	return physid_isset(0, phys_cpu_present_map);
 }
 
 static const struct cpumask *noop_target_cpus(void)
 {
-	warn_apic_enabled();
-
 	/* only BSP here */
 	return cpumask_of(0);
 }
 
 static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid)
 {
-	warn_apic_enabled();
 	return physid_isset(apicid, bitmap);
 }
 
 static unsigned long noop_check_apicid_present(int bit)
 {
-	warn_apic_enabled();
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
 static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 {
-	warn_apic_enabled();
 	if (cpu != 0)
 		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
 	cpumask_clear(retmask);
@@ -108,22 +120,21 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
 
 int noop_apicid_to_node(int logical_apicid)
 {
-	warn_apic_enabled();
-
 	/* we're always on node 0 */
 	return 0;
 }
 
 static u32 noop_apic_read(u32 reg)
 {
-	/*
-	 * noop-read is always safe until we have
-	 * non-disabled unit
-	 */
 	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
 	return 0;
 }
 
+static void noop_apic_write(u32 reg, u32 v)
+{
+	WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
 struct apic apic_noop = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -157,7 +168,7 @@ struct apic apic_noop = {
 	.check_phys_apicid_present	= default_check_phys_apicid_present,
 	.enable_apic_mode		= NULL,
 
-	.phys_pkg_id			= noop_default_phys_pkg_id,
+	.phys_pkg_id			= noop_phys_pkg_id,
 
 	.mps_oem_check			= NULL,
 

From 6f9b41006af1bc489030f84ee247abc0df1edccd Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <herrmann.der.user@googlemail.com>
Date: Tue, 27 Oct 2009 11:01:38 +0100
Subject: [PATCH 09/30] x86, apic: Clear APIC Timer Initial Count Register on
 shutdown

Commit a98f8fd24fb24fcb9a359553e64dd6aac5cf4279 (x86: apic reset
counter on shutdown) set the counter to max to avoid spurious
interrupts when the timer is re-enabled.

(In theory) you'll still get a spurious interrupt if spending
more than 344 seconds with this interrupt disabled and then
unmasking it.

The right thing to do is to clear the register. This disables
the interrupt from happening (at least it does on AMD hardware).

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
LKML-Reference: <20091027100138.GB30802@alberich.amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dce93d4b0ea..4c689f45b23 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -444,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 		v = apic_read(APIC_LVTT);
 		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 		apic_write(APIC_LVTT, v);
-		apic_write(APIC_TMICT, 0xffffffff);
+		apic_write(APIC_TMICT, 0);
 		break;
 	case CLOCK_EVT_MODE_RESUME:
 		/* Nothing to do here */

From 7a7732bc0f7c46f217dbec723f25366b6285cc42 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:31 -0800
Subject: [PATCH 10/30] x86: Unify fixup_irqs() for 32-bit and 64-bit kernels

There is no reason to have different fixup_irqs() for 32-bit and
64-bit kernels. Unify by using the superior 64-bit version for
both the kernels.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230001.562512739@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq.c    | 59 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/irq_32.c | 45 ------------------------------
 arch/x86/kernel/irq_64.c | 58 ---------------------------------------
 3 files changed, 59 insertions(+), 103 deletions(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 39120619951..3ea66556e5e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -276,3 +276,62 @@ void smp_generic_interrupt(struct pt_regs *regs)
 }
 
 EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
+
+#ifdef CONFIG_HOTPLUG_CPU
+/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+void fixup_irqs(void)
+{
+	unsigned int irq;
+	static int warned;
+	struct irq_desc *desc;
+
+	for_each_irq_desc(irq, desc) {
+		int break_affinity = 0;
+		int set_affinity = 1;
+		const struct cpumask *affinity;
+
+		if (!desc)
+			continue;
+		if (irq == 2)
+			continue;
+
+		/* interrupt's are disabled at this point */
+		spin_lock(&desc->lock);
+
+		affinity = desc->affinity;
+		if (!irq_has_action(irq) ||
+		    cpumask_equal(affinity, cpu_online_mask)) {
+			spin_unlock(&desc->lock);
+			continue;
+		}
+
+		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+			break_affinity = 1;
+			affinity = cpu_all_mask;
+		}
+
+		if (desc->chip->mask)
+			desc->chip->mask(irq);
+
+		if (desc->chip->set_affinity)
+			desc->chip->set_affinity(irq, affinity);
+		else if (!(warned++))
+			set_affinity = 0;
+
+		if (desc->chip->unmask)
+			desc->chip->unmask(irq);
+
+		spin_unlock(&desc->lock);
+
+		if (break_affinity && set_affinity)
+			printk("Broke affinity for irq %i\n", irq);
+		else if (!set_affinity)
+			printk("Cannot set affinity for irq %i\n", irq);
+	}
+
+	/* That doesn't seem sufficient.  Give it 1ms. */
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+}
+#endif
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 7d35d0fe232..10709f29d16 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
 
 	return true;
 }
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
-void fixup_irqs(void)
-{
-	unsigned int irq;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(irq, desc) {
-		const struct cpumask *affinity;
-
-		if (!desc)
-			continue;
-		if (irq == 2)
-			continue;
-
-		affinity = desc->affinity;
-		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-			printk("Breaking affinity for irq %i\n", irq);
-			affinity = cpu_all_mask;
-		}
-		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, affinity);
-		else if (desc->action)
-			printk_once("Cannot set affinity for irq %i\n", irq);
-	}
-
-#if 0
-	barrier();
-	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
-	   [note the nop - the interrupt-enable boundary on x86 is two
-	   instructions from sti] - to flush out pending hardirqs and
-	   IPIs. After this point nothing is supposed to reach this CPU." */
-	__asm__ __volatile__("sti; nop; cli");
-	barrier();
-#else
-	/* That doesn't seem sufficient.  Give it 1ms. */
-	local_irq_enable();
-	mdelay(1);
-	local_irq_disable();
-#endif
-}
-#endif
-
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 977d8b43a0d..acf8fbf8fbd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
 	return true;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-/* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
-void fixup_irqs(void)
-{
-	unsigned int irq;
-	static int warned;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(irq, desc) {
-		int break_affinity = 0;
-		int set_affinity = 1;
-		const struct cpumask *affinity;
-
-		if (!desc)
-			continue;
-		if (irq == 2)
-			continue;
-
-		/* interrupt's are disabled at this point */
-		spin_lock(&desc->lock);
-
-		affinity = desc->affinity;
-		if (!irq_has_action(irq) ||
-		    cpumask_equal(affinity, cpu_online_mask)) {
-			spin_unlock(&desc->lock);
-			continue;
-		}
-
-		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-			break_affinity = 1;
-			affinity = cpu_all_mask;
-		}
-
-		if (desc->chip->mask)
-			desc->chip->mask(irq);
-
-		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, affinity);
-		else if (!(warned++))
-			set_affinity = 0;
-
-		if (desc->chip->unmask)
-			desc->chip->unmask(irq);
-
-		spin_unlock(&desc->lock);
-
-		if (break_affinity && set_affinity)
-			printk("Broke affinity for irq %i\n", irq);
-		else if (!set_affinity)
-			printk("Cannot set affinity for irq %i\n", irq);
-	}
-
-	/* That doesn't seem sufficient.  Give it 1ms. */
-	local_irq_enable();
-	mdelay(1);
-	local_irq_disable();
-}
-#endif
 
 extern void call_softirq(void);
 

From 84e21493a3b28c9fefe99fe827fc0c0c101a813d Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:32 -0800
Subject: [PATCH 11/30] x86, intr-remap: Avoid irq_chip mask/unmask in
 fixup_irqs() for intr-remapping

In the presence of interrupt-remapping, irqs will be migrated in
the process context and we don't do (and there is no need to)
irq_chip mask/unmask while migrating the interrupt.

Similarly fix the fixup_irqs() that get called during cpu
offline and avoid calling irq_chip mask/unmask for irqs that are
ok to be migrated in the process context.

While we didn't observe any race condition with the existing
code, this change takes complete advantage of
interrupt-remapping in the newer generation platforms and avoids
any potential HW lockup's (that often worry Eric :)

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: garyhade@us.ibm.com
LKML-Reference: <20091026230001.661423939@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3ea66556e5e..342bcbca19b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -310,7 +310,7 @@ void fixup_irqs(void)
 			affinity = cpu_all_mask;
 		}
 
-		if (desc->chip->mask)
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
@@ -318,7 +318,7 @@ void fixup_irqs(void)
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (desc->chip->unmask)
+		if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
 			desc->chip->unmask(irq);
 
 		spin_unlock(&desc->lock);

From 23359a88e7eca3c4f402562b102f23014db3c2aa Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:33 -0800
Subject: [PATCH 12/30] x86: Remove move_cleanup_count from irq_cfg

move_cleanup_count for each irq in irq_cfg is keeping track of
the total number of cpus that need to free the corresponding
vectors associated with the irq which has now been migrated to
new destination. As long as this move_cleanup_count is non-zero
(i.e., as long as we have n't freed the vector allocations on
the old destinations) we were preventing the irq's further
migration.

This cleanup count is unnecessary and it is enough to not allow
the irq migration till we send the cleanup vector to the
previous irq destination, for which we already have irq_cfg's
move_in_progress.  All we need to make sure is that we free the
vector at the old desintation but we don't need to wait till
that gets freed.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230001.752968906@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h  | 1 -
 arch/x86/kernel/apic/io_apic.c | 9 +--------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1984ce9a13d..6e124269fd4 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -94,7 +94,6 @@ struct irq_cfg {
 	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
-	unsigned		move_cleanup_count;
 	u8			vector;
 	u8			move_in_progress : 1;
 };
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ce16b65cfdc..e9e5b02c3af 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1161,7 +1161,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 	int cpu, err;
 	cpumask_var_t tmp_mask;
 
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+	if (cfg->move_in_progress)
 		return -EBUSY;
 
 	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -2234,14 +2234,10 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 
 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
 		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
 		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
 			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
 	} else {
 		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		free_cpumask_var(cleanup_mask);
 	}
@@ -2430,8 +2426,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 
 		cfg = irq_cfg(irq);
 		spin_lock(&desc->lock);
-		if (!cfg->move_cleanup_count)
-			goto unlock;
 
 		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 			goto unlock;
@@ -2449,7 +2443,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 		}
 		__get_cpu_var(vector_irq)[vector] = -1;
-		cfg->move_cleanup_count--;
 unlock:
 		spin_unlock(&desc->lock);
 	}

From a5e74b841930bec78a4684ab9f208b2ddfe7c736 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:34 -0800
Subject: [PATCH 13/30] x86: Force irq complete move during cpu offline

When a cpu goes offline, fixup_irqs() try to move irq's
currently destined to the offline cpu to a new cpu. But this
attempt will fail if the irq is recently moved to this cpu and
the irq still hasn't arrived at this cpu (for non intr-remapping
platforms this is when we free the vector allocation at the
previous destination) that is about to go offline.

This will endup with the interrupt subsystem still pointing the
irq to the offline cpu, causing that irq to not work any more.

Fix this by forcing the irq to complete its move (its been a
long time we moved the irq to this cpu which we are offlining
now) and then move this irq to a new cpu before this cpu goes
offline.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230001.848830905@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq.h     |  1 +
 arch/x86/kernel/apic/io_apic.c | 18 +++++++++++++++---
 arch/x86/kernel/irq.c          |  7 +++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index ddda6cbed6f..ffd700ff5dc 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq)
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern void fixup_irqs(void);
+extern void irq_force_complete_move(int);
 #endif
 
 extern void (*generic_interrupt_extension)(void);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e9e5b02c3af..4e886efd9a1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2450,21 +2450,33 @@ unlock:
 	irq_exit();
 }
 
-static void irq_complete_move(struct irq_desc **descp)
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
 {
 	struct irq_desc *desc = *descp;
 	struct irq_cfg *cfg = desc->chip_data;
-	unsigned vector, me;
+	unsigned me;
 
 	if (likely(!cfg->move_in_progress))
 		return;
 
-	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
 	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
 }
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+	__irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+
+	__irq_complete_move(&desc, cfg->vector);
+}
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 342bcbca19b..b10a5e1da06 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -305,6 +305,13 @@ void fixup_irqs(void)
 			continue;
 		}
 
+		/*
+		 * Complete the irq move. This cpu is going down and for
+		 * non intr-remapping case, we can't wait till this interrupt
+		 * arrives at this cpu before completing the irq move.
+		 */
+		irq_force_complete_move(irq);
+
 		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
 			affinity = cpu_all_mask;

From b3ec0a37a7907813bb4fb85a2d94102c152470b7 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:35 -0800
Subject: [PATCH 14/30] x86: Use EOI register in io-apic on intel platforms

IO-APIC's in intel chipsets support EOI register starting from
IO-APIC version 2. Use that when ever we need to clear the
IO-APIC RTE's RemoteIRR bit explicitly.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230001.947855317@sbs-t61.sc.intel.com>
[ Marked use_eio_reg as __read_mostly, fixed small details ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 81 ++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4e886efd9a1..31e9db3c12a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2492,6 +2492,51 @@ static void ack_apic_edge(unsigned int irq)
 
 atomic_t irq_mis_count;
 
+static int use_eoi_reg __read_mostly;
+
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		if (irq_remapped(irq))
+			io_apic_eoi(entry->apic, entry->pin);
+		else
+			io_apic_eoi(entry->apic, cfg->vector);
+	}
+}
+
+static void eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static int ioapic_supports_eoi(void)
+{
+	struct pci_dev *root;
+
+	root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
+	if (root && root->vendor == PCI_VENDOR_ID_INTEL &&
+	    mp_ioapics[0].apicver >= 0x2) {
+		use_eoi_reg = 1;
+		printk(KERN_INFO "IO-APIC supports EOI register\n");
+	} else
+		printk(KERN_INFO "IO-APIC doesn't support EOI\n");
+
+	return 0;
+}
+
+fs_initcall(ioapic_supports_eoi);
+
 static void ack_apic_level(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2575,37 +2620,19 @@ static void ack_apic_level(unsigned int irq)
 	/* Tail end of version 0x11 I/O APIC bug workaround */
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
-		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(cfg);
-		__unmask_and_level_IO_APIC_irq(cfg);
-		spin_unlock(&ioapic_lock);
+
+		if (use_eoi_reg)
+			eoi_ioapic_irq(desc);
+		else {
+			spin_lock(&ioapic_lock);
+			__mask_and_edge_IO_APIC_irq(cfg);
+			__unmask_and_level_IO_APIC_irq(cfg);
+			spin_unlock(&ioapic_lock);
+		}
 	}
 }
 
 #ifdef CONFIG_INTR_REMAP
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	struct irq_pin_list *entry;
-
-	for_each_irq_pin(entry, cfg->irq_2_pin)
-		io_apic_eoi(entry->apic, entry->pin);
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
 static void ir_ack_apic_edge(unsigned int irq)
 {
 	ack_APIC_irq();

From 5231a68614b94f60e8f6c56bc6e3d75955b9e75e Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 26 Oct 2009 14:24:36 -0800
Subject: [PATCH 15/30] x86: Remove local_irq_enable()/local_irq_disable() in
 fixup_irqs()

To ensure that we handle all the pending interrupts (destined
for this cpu that is going down) in the interrupt subsystem
before the cpu goes offline, fixup_irqs() does:

	local_irq_enable();
	mdelay(1);
	local_irq_disable();

Enabling interrupts is not a good thing as this cpu is already
offline. So this patch replaces that logic with,

	mdelay(1);
	check APIC_IRR bits
	Retrigger the irq at the new destination if any interrupt has arrived
	via IPI.

For IO-APIC level triggered interrupts, this retrigger IPI will
appear as an edge interrupt. ack_apic_level() will detect this
condition and IO-APIC RTE's remoteIRR is cleared using directed
EOI(using IO-APIC EOI register) on Intel platforms and for
others it uses the existing mask+edge logic followed by
unmask+level.

We can also remove mdelay() and then send spuriuous interrupts
to new cpu targets for all the irqs that were handled previously
by this cpu that is going offline. While it works, I have seen
spurious interrupt messages (nothing wrong but still annoying
messages during cpu offline, which can be seen during
suspend/resume etc)

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Gary Hade <garyhade@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <20091026230002.043281924@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/irq.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b10a5e1da06..8a82728d47c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -281,7 +281,7 @@ EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 void fixup_irqs(void)
 {
-	unsigned int irq;
+	unsigned int irq, vector;
 	static int warned;
 	struct irq_desc *desc;
 
@@ -336,9 +336,33 @@ void fixup_irqs(void)
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
 
-	/* That doesn't seem sufficient.  Give it 1ms. */
-	local_irq_enable();
+	/*
+	 * We can remove mdelay() and then send spuriuous interrupts to
+	 * new cpu targets for all the irqs that were handled previously by
+	 * this cpu. While it works, I have seen spurious interrupt messages
+	 * (nothing wrong but still...).
+	 *
+	 * So for now, retain mdelay(1) and check the IRR and then send those
+	 * interrupts to new targets as this cpu is already offlined...
+	 */
 	mdelay(1);
-	local_irq_disable();
+
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		unsigned int irr;
+
+		if (__get_cpu_var(vector_irq)[vector] < 0)
+			continue;
+
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		if (irr  & (1 << (vector % 32))) {
+			irq = __get_cpu_var(vector_irq)[vector];
+
+			desc = irq_to_desc(irq);
+			spin_lock(&desc->lock);
+			if (desc->chip->retrigger)
+				desc->chip->retrigger(irq);
+			spin_unlock(&desc->lock);
+		}
+	}
 }
 #endif

From 46dc281b1bb02527195fe2ad50a3af6d7f7f7325 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 8 Nov 2009 18:53:56 +0300
Subject: [PATCH 16/30] x86, apic: Use PAGE_SIZE instead of numbers

The whole page is reserved for IO-APIC fixmap
due to non-cacheable requirement. So lets note
this explicitly instead of playing with numbers.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
LKML-Reference: <20091108155356.GB25940@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 31e9db3c12a..9ee1c1628c1 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4111,7 +4111,7 @@ fake_ioapic_page:
 		idx++;
 
 		ioapic_res->start = ioapic_phys;
-		ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+		ioapic_res->end = ioapic_phys + PAGE_SIZE-1;
 		ioapic_res++;
 	}
 }

From 4343fe1024e09e17667f95620ed3e69a7a5f4389 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 8 Nov 2009 18:54:31 +0300
Subject: [PATCH 17/30] x86, ioapic: Use snrpintf while set names for IO-APIC
 resourses

We should be ready that one day MAX_IO_APICS may raise its
number. To prevent memory overwrite we're to use safe
snprintf while set IO-APIC resourse name.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20091108155431.GC25940@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9ee1c1628c1..24d1458a182 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4066,7 +4066,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 	for (i = 0; i < nr_ioapics; i++) {
 		res[i].name = mem;
 		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		sprintf(mem,  "IOAPIC %u", i);
+		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
 	}
 

From f4a70c55376683213229af7266dc57ad81aee354 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 8 Nov 2009 16:16:45 +0300
Subject: [PATCH 18/30] x86, apic: Get rid of apicid_to_cpu_present assign on
 64-bit

In fact it's never get used on x86-64 (for 64 bit platform
we use differ technique to enumerate io-units).

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <20091108131645.GD5300@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic_noop.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 9ab6ffb313a..89629f622b6 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -162,7 +162,12 @@ struct apic apic_noop = {
 
 	.cpu_to_logical_apicid		= noop_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+
+#ifdef CONFIG_X86_32
 	.apicid_to_cpu_present		= default_apicid_to_cpu_present,
+#else
+	.apicid_to_cpu_present		= NULL,
+#endif
 
 	.setup_portio_remap		= NULL,
 	.check_phys_apicid_present	= default_check_phys_apicid_present,

From 7abc07531383ac7f727cc9d44e1360a829f2082e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 10 Nov 2009 01:06:59 +0300
Subject: [PATCH 19/30] x86: apic: Do not use stacked physid_mask_t

We should not use physid_mask_t as a stack based
variable in apic code. This type depends on MAX_APICS
parameter which may be huge enough.

Especially it became a problem with apic NOOP driver which
is portable between 32 bit and 64 bit environment
(where we have really huge MAX_APICS).

So apic driver should operate with pointers and a caller
in turn should aware of allocation physid_mask_t variable.

As a side (but positive) effect -- we may use already
implemented physid_set_mask_of_physid function eliminating
default_apicid_to_cpu_present completely.

Note that physids_coerce and physids_promote turned into static
inline from macro (since macro hides the fact that parameter is
being interpreted as unsigned long, make it explicit).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
LKML-Reference: <20091109220659.GA5568@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apic.h      | 19 +++++++------------
 arch/x86/include/asm/mpspec.h    | 16 +++++++++-------
 arch/x86/kernel/apic/apic_noop.c | 18 ++++--------------
 arch/x86/kernel/apic/bigsmp_32.c | 13 ++++---------
 arch/x86/kernel/apic/es7000_32.c | 16 ++++++----------
 arch/x86/kernel/apic/io_apic.c   | 14 +++++++-------
 arch/x86/kernel/apic/numaq_32.c  | 13 ++++++-------
 arch/x86/kernel/apic/probe_32.c  |  2 +-
 arch/x86/kernel/apic/summit_32.c | 10 +++++-----
 arch/x86/kernel/visws_quirks.c   |  2 +-
 10 files changed, 50 insertions(+), 73 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 08a5f420e07..b4ac2cdcb64 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -297,20 +297,20 @@ struct apic {
 	int disable_esr;
 
 	int dest_logical;
-	unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
+	unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
 	unsigned long (*check_apicid_present)(int apicid);
 
 	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
 	void (*init_apic_ldr)(void);
 
-	physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
+	void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
 
 	void (*setup_apic_routing)(void);
 	int (*multi_timer_check)(int apic, int irq);
 	int (*apicid_to_node)(int logical_apicid);
 	int (*cpu_to_logical_apicid)(int cpu);
 	int (*cpu_present_to_apicid)(int mps_cpu);
-	physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
+	void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
 	void (*setup_portio_remap)(void);
 	int (*check_phys_apicid_present)(int phys_apicid);
 	void (*enable_apic_mode)(void);
@@ -534,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 	return (unsigned int)(mask1 & mask2 & mask3);
 }
 
-static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
+static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
 {
-	return physid_isset(apicid, bitmap);
+	return physid_isset(apicid, *map);
 }
 
 static inline unsigned long default_check_apicid_present(int bit)
@@ -544,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit)
 	return physid_isset(bit, phys_cpu_present_map);
 }
 
-static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map)
+static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
-	return phys_map;
+	*retmap = *phys_map;
 }
 
 /* Mapping from cpu number to logical apicid */
@@ -585,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu);
 extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
-static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid)
-{
-	return physid_mask_of_physid(phys_apicid);
-}
-
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 79c94500c0b..61d90b1331c 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t;
 #define physids_shift_left(d, s, n)				\
 	bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
 
-#define physids_coerce(map)			((map).mask[0])
+static inline unsigned long physids_coerce(physid_mask_t *map)
+{
+	return map->mask[0];
+}
 
-#define physids_promote(physids)					\
-	({								\
-		physid_mask_t __physid_mask = PHYSID_MASK_NONE;		\
-		__physid_mask.mask[0] = physids;			\
-		__physid_mask;						\
-	})
+static inline void physids_promote(unsigned long physids, physid_mask_t *map)
+{
+	physids_clear(*map);
+	map->mask[0] = physids;
+}
 
 /* Note: will create very large stack frames if physid_mask_t is big */
 #define physid_mask_of_physid(physid)					\
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 89629f622b6..d9acc3bee0f 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
 	return 0;
 }
 
-static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map)
-{
-	return phys_map;
-}
-
 static int noop_cpu_to_logical_apicid(int cpu)
 {
 	return 0;
@@ -100,9 +95,9 @@ static const struct cpumask *noop_target_cpus(void)
 	return cpumask_of(0);
 }
 
-static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid)
 {
-	return physid_isset(apicid, bitmap);
+	return physid_isset(apicid, *map);
 }
 
 static unsigned long noop_check_apicid_present(int bit)
@@ -155,19 +150,14 @@ struct apic apic_noop = {
 	.vector_allocation_domain	= noop_vector_allocation_domain,
 	.init_apic_ldr			= noop_init_apic_ldr,
 
-	.ioapic_phys_id_map		= noop_ioapic_phys_id_map,
+	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
 	.setup_apic_routing		= NULL,
 	.multi_timer_check		= NULL,
 	.apicid_to_node			= noop_apicid_to_node,
 
 	.cpu_to_logical_apicid		= noop_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
-
-#ifdef CONFIG_X86_32
-	.apicid_to_cpu_present		= default_apicid_to_cpu_present,
-#else
-	.apicid_to_cpu_present		= NULL,
-#endif
+	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 
 	.setup_portio_remap		= NULL,
 	.check_phys_apicid_present	= default_check_phys_apicid_present,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 77a06413b6b..38dcecfa581 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void)
 #endif
 }
 
-static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return 0;
 }
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
 	return BAD_APICID;
 }
 
-static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
-{
-	return physid_mask_of_physid(phys_apicid);
-}
-
 /* Mapping from cpu number to logical apicid */
 static inline int bigsmp_cpu_to_logical_apicid(int cpu)
 {
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu)
 	return cpu_physical_id(cpu);
 }
 
-static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
+static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
-	return physids_promote(0xFFL);
+	physids_promote(0xFFL, retmap);
 }
 
 static int bigsmp_check_phys_apicid_present(int phys_apicid)
@@ -230,7 +225,7 @@ struct apic apic_bigsmp = {
 	.apicid_to_node			= bigsmp_apicid_to_node,
 	.cpu_to_logical_apicid		= bigsmp_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
-	.apicid_to_cpu_present		= bigsmp_apicid_to_cpu_present,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
 	.check_phys_apicid_present	= bigsmp_check_phys_apicid_present,
 	.enable_apic_mode		= NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 89174f847b4..e85f8fb7f8e 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void)
 	return cpumask_of(smp_processor_id());
 }
 
-static unsigned long
-es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return 0;
 }
+
 static unsigned long es7000_check_apicid_present(int bit)
 {
 	return physid_isset(bit, phys_cpu_present_map);
@@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu)
 
 static int cpu_id;
 
-static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
+static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
 {
-	physid_mask_t mask;
-
-	mask = physid_mask_of_physid(cpu_id);
+	physid_set_mask_of_physid(cpu_id, retmap);
 	++cpu_id;
-
-	return mask;
 }
 
 /* Mapping from cpu number to logical apicid */
@@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu)
 #endif
 }
 
-static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
+static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
-	return physids_promote(0xff);
+	physids_promote(0xFFL, retmap);
 }
 
 static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 24d1458a182..20ea8392bc5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2031,7 +2031,7 @@ void __init setup_ioapic_ids_from_mpc(void)
 	 * This is broken; anything with a real cpu count has to
 	 * circumvent this idiocy regardless.
 	 */
-	phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
 
 	/*
 	 * Set the IOAPIC ID to the value stored in the MPC table.
@@ -2058,7 +2058,7 @@ void __init setup_ioapic_ids_from_mpc(void)
 		 * system must have a unique ID or we get lots of nice
 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 		 */
-		if (apic->check_apicid_used(phys_id_present_map,
+		if (apic->check_apicid_used(&phys_id_present_map,
 					mp_ioapics[apic_id].apicid)) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
 				apic_id, mp_ioapics[apic_id].apicid);
@@ -2073,7 +2073,7 @@ void __init setup_ioapic_ids_from_mpc(void)
 			mp_ioapics[apic_id].apicid = i;
 		} else {
 			physid_mask_t tmp;
-			tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+			apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
 			apic_printk(APIC_VERBOSE, "Setting %d in the "
 					"phys_id_present_map\n",
 					mp_ioapics[apic_id].apicid);
@@ -3904,7 +3904,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 	 */
 
 	if (physids_empty(apic_id_map))
-		apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	reg_00.raw = io_apic_read(ioapic, 0);
@@ -3920,10 +3920,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 	 * Every APIC in a system must have a unique ID or we get lots of nice
 	 * 'stuck on smp_invalidate_needed IPI wait' messages.
 	 */
-	if (apic->check_apicid_used(apic_id_map, apic_id)) {
+	if (apic->check_apicid_used(&apic_id_map, apic_id)) {
 
 		for (i = 0; i < get_physical_broadcast(); i++) {
-			if (!apic->check_apicid_used(apic_id_map, i))
+			if (!apic->check_apicid_used(&apic_id_map, i))
 				break;
 		}
 
@@ -3936,7 +3936,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 		apic_id = i;
 	}
 
-	tmp = apic->apicid_to_cpu_present(apic_id);
+	apic->apicid_to_cpu_present(apic_id, &tmp);
 	physids_or(apic_id_map, apic_id_map, tmp);
 
 	if (reg_00.bits.ID != apic_id) {
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index efa00e2b850..07cdbdcd7a9 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void)
 	return cpu_all_mask;
 }
 
-static inline unsigned long
-numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
 {
-	return physid_isset(apicid, bitmap);
+	return physid_isset(apicid, *map);
 }
 
 static inline unsigned long numaq_check_apicid_present(int bit)
@@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq)
 	return apic != 0 && irq == 0;
 }
 
-static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
+static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
 {
 	/* We don't have a good way to do this yet - hack */
-	return physids_promote(0xFUL);
+	return physids_promote(0xFUL, retmap);
 }
 
 static inline int numaq_cpu_to_logical_apicid(int cpu)
@@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid)
 	return logical_apicid >> 4;
 }
 
-static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
+static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
 {
 	int node = numaq_apicid_to_node(logical_apicid);
 	int cpu = __ffs(logical_apicid & 0xf);
 
-	return physid_mask_of_physid(cpu + 4*node);
+	physid_set_mask_of_physid(cpu + 4*node, retmap);
 }
 
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0c0182cc947..1a6559f6768 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -108,7 +108,7 @@ struct apic apic_default = {
 	.apicid_to_node			= default_apicid_to_node,
 	.cpu_to_logical_apicid		= default_cpu_to_logical_apicid,
 	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
-	.apicid_to_cpu_present		= default_apicid_to_cpu_present,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid,
 	.setup_portio_remap		= NULL,
 	.check_phys_apicid_present	= default_check_phys_apicid_present,
 	.enable_apic_mode		= NULL,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 645ecc4ff0b..9b419263d90 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void)
 	return cpumask_of(0);
 }
 
-static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
+static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
 {
 	return 0;
 }
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu)
 		return BAD_APICID;
 }
 
-static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
+static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
 {
 	/* For clustered we don't have a good way to do this yet - hack */
-	return physids_promote(0x0F);
+	physids_promote(0x0FL, retmap);
 }
 
-static physid_mask_t summit_apicid_to_cpu_present(int apicid)
+static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
 {
-	return physid_mask_of_physid(0);
+	physid_set_mask_of_physid(0, retmap);
 }
 
 static int summit_check_phys_apicid_present(int physical_apicid)
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index f068553a1b1..cff70c86e18 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 		return;
 	}
 
-	apic_cpus = apic->apicid_to_cpu_present(m->apicid);
+	apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
 	physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
 	/*
 	 * Validate version

From 200a9ae2801bc725f2c41ab13f6e0fb1610d2fb6 Mon Sep 17 00:00:00 2001
From: Dimitri Sivanich <sivanich@sgi.com>
Date: Tue, 10 Nov 2009 13:58:35 -0600
Subject: [PATCH 20/30] x86: Remove asm/apicnum.h

arch/x86/include/asm/apicnum.h is not referenced anywhere
anymore. Its definitions appear in apicdef.h. Remove it.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Mike Travis <travis@sgi.com>
LKML-Reference: <20091110195835.GA4393@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicnum.h | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 arch/x86/include/asm/apicnum.h

diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
deleted file mode 100644
index 82f613c607c..00000000000
--- a/arch/x86/include/asm/apicnum.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_X86_APICNUM_H
-#define _ASM_X86_APICNUM_H
-
-/* define MAX_IO_APICS */
-#ifdef CONFIG_X86_32
-# define MAX_IO_APICS 64
-#else
-# define MAX_IO_APICS 128
-# define MAX_LOCAL_APIC 32768
-#endif
-
-#endif /* _ASM_X86_APICNUM_H */

From e79c65a97c01d5da4317f44f9f98b3814e091a43 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 16 Nov 2009 18:14:26 +0300
Subject: [PATCH 21/30] x86: io-apic: IO-APIC MMIO should not fail on resource
 insertion

If IO-APIC base address is 1K aligned we should not fail
on resourse insertion procedure. For this sake we define
IO_APIC_SLOT_SIZE constant which should cover all IO-APIC
direct accessible registers.

An example of a such configuration is there

	http://marc.info/?l=linux-kernel&m=118114792006520

 |
 | Quoting the message
 |
 | IOAPIC[0]: apic_id 2, version 32, address 0xfec00000, GSI 0-23
 | IOAPIC[1]: apic_id 3, version 32, address 0xfec80000, GSI 24-47
 | IOAPIC[2]: apic_id 4, version 32, address 0xfec80400, GSI 48-71
 | IOAPIC[3]: apic_id 5, version 32, address 0xfec84000, GSI 72-95
 | IOAPIC[4]: apic_id 8, version 32, address 0xfec84400, GSI 96-119
 |

Reported-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20091116151426.GC5653@lenovo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicdef.h |  6 ++++++
 arch/x86/kernel/apic/io_apic.c | 11 +++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 3b62da926de..7fe3b3060f0 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -11,6 +11,12 @@
 #define IO_APIC_DEFAULT_PHYS_BASE	0xfec00000
 #define	APIC_DEFAULT_PHYS_BASE		0xfee00000
 
+/*
+ * This is the IO-APIC register space as specified
+ * by Intel docs:
+ */
+#define IO_APIC_SLOT_SIZE		1024
+
 #define	APIC_ID		0x20
 
 #define	APIC_LVR	0x30
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 20ea8392bc5..ff237199fa2 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4100,18 +4100,17 @@ void __init ioapic_init_mappings(void)
 #ifdef CONFIG_X86_32
 fake_ioapic_page:
 #endif
-			ioapic_phys = (unsigned long)
-				alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
 			ioapic_phys = __pa(ioapic_phys);
 		}
 		set_fixmap_nocache(idx, ioapic_phys);
-		apic_printk(APIC_VERBOSE,
-			    "mapped IOAPIC to %08lx (%08lx)\n",
-			    __fix_to_virt(idx), ioapic_phys);
+		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+			ioapic_phys);
 		idx++;
 
 		ioapic_res->start = ioapic_phys;
-		ioapic_res->end = ioapic_phys + PAGE_SIZE-1;
+		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
 		ioapic_res++;
 	}
 }

From e670761f12f4069d204f433bf547d9c679a4fd05 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 21 Nov 2009 00:23:37 -0800
Subject: [PATCH 22/30] x86: apic: Remove not needed #ifdef

Suresh made dmar_table_init() already have that protection.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B07A739.3030104@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4c689f45b23..ad8c75b9e45 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1377,14 +1377,11 @@ void __init enable_IR_x2apic(void)
 	unsigned long flags;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 	int ret, x2apic_enabled = 0;
-	int dmar_table_init_ret = 0;
+	int dmar_table_init_ret;
 
-#ifdef CONFIG_INTR_REMAP
 	dmar_table_init_ret = dmar_table_init();
-	if (dmar_table_init_ret)
-		pr_debug("dmar_table_init() failed with %d:\n",
-				dmar_table_init_ret);
-#endif
+	if (dmar_table_init_ret && !x2apic_supported())
+		return;
 
 	ioapic_entries = alloc_ioapic_entries();
 	if (!ioapic_entries) {

From 37ef2a3029fde884808ff1b369677abc7dd9a79a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 21 Nov 2009 00:23:37 -0800
Subject: [PATCH 23/30] x86: Re-get cfg_new in case reuse/move irq_desc

When irq_desc is moved, we need to make sure to use the right cfg_new.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B07A739.3030104@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ff237199fa2..085e60e303c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3186,6 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
 			continue;
 
 		desc_new = move_irq_desc(desc_new, node);
+		cfg_new = desc_new->chip_data;
 
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;

From 163d3866cfa79aa5945f1ee5e43fb3ed1455f75c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 21 Nov 2009 00:23:37 -0800
Subject: [PATCH 24/30] x86: apic: Print out SRAT table APIC id in hex

Make it consistent with APIC MADT print out,
for big systems APIC id in hex is more readable.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B07A739.3030104@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/srat_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index dbb5381f7b3..9d7ce96e5a5 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 	apicid_to_node[apic_id] = node;
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
-	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
 	       pxm, apic_id, node);
 }
 
@@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	apicid_to_node[apic_id] = node;
 	node_set(node, cpu_nodes_parsed);
 	acpi_numa = 1;
-	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
+	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
 	       pxm, apic_id, node);
 }
 

From 918bc960dc630b1a79c0d2991a81985812ff69f5 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Wed, 25 Nov 2009 10:20:19 -0600
Subject: [PATCH 25/30] x86: SGI UV: Map low MMR ranges

Explicitly mmap the UV chipset MMR address ranges used to
access blade-local registers. Although these same MMRs are also
mmaped at higher addresses, the low range is more
convenient when accessing blade-local registers.

The low range addresses always alias to the local blade
regardless of the blade id.

Signed-off-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <20091125162018.GA25445@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f5f5886a6b5..6d425490fb1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode)
 		map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
 }
 
+static __init void map_low_mmrs(void)
+{
+	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
 static __init void uv_rtc_init(void)
 {
 	long status;
@@ -550,6 +556,8 @@ void __init uv_system_init(void)
 	unsigned long mmr_base, present, paddr;
 	unsigned short pnode_mask;
 
+	map_low_mmrs();
+
 	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
 	m_val = m_n_config.s.m_skt;
 	n_val = m_n_config.s.n_skt;

From ca64c47cecd0321b2e0dcbd7aaff44b68ce20654 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Tue, 1 Dec 2009 15:31:15 -0800
Subject: [PATCH 26/30] x86, io-apic: Move the effort of clearing remoteIRR
 explicitly before migrating the irq

When the level-triggered interrupt is seen as an edge interrupt,
we try to clear the remoteIRR explicitly (using either an
io-apic eoi register when present or through the idea of
changing trigger mode of the io-apic RTE to edge and then back
to level). But this explicit try also needs to happen before we
try to migrate the irq. Otherwise irq migration attempt will
fail anyhow, as it postpones the irq migration to a later
attempt when it sees the remoteIRR in the io-apic RTE still set.

Signed-off-by: "Maciej W. Rozycki" <macro@linux-mips.org>
Reviewed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: ebiederm@xmission.com
Cc: garyhade@us.ibm.com
LKML-Reference: <20091201233334.975416130@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 085e60e303c..b377b973899 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2583,6 +2583,20 @@ static void ack_apic_level(unsigned int irq)
 	 */
 	ack_APIC_irq();
 
+	/* Tail end of version 0x11 I/O APIC bug workaround */
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+
+		if (use_eoi_reg)
+			eoi_ioapic_irq(desc);
+		else {
+			spin_lock(&ioapic_lock);
+			__mask_and_edge_IO_APIC_irq(cfg);
+			__unmask_and_level_IO_APIC_irq(cfg);
+			spin_unlock(&ioapic_lock);
+		}
+	}
+
 	/* Now we can move and renable the irq */
 	if (unlikely(do_unmask_irq)) {
 		/* Only migrate the irq if the ack has been received.
@@ -2616,20 +2630,6 @@ static void ack_apic_level(unsigned int irq)
 			move_masked_irq(irq);
 		unmask_IO_APIC_irq_desc(desc);
 	}
-
-	/* Tail end of version 0x11 I/O APIC bug workaround */
-	if (!(v & (1 << (i & 0x1f)))) {
-		atomic_inc(&irq_mis_count);
-
-		if (use_eoi_reg)
-			eoi_ioapic_irq(desc);
-		else {
-			spin_lock(&ioapic_lock);
-			__mask_and_edge_IO_APIC_irq(cfg);
-			__unmask_and_level_IO_APIC_irq(cfg);
-			spin_unlock(&ioapic_lock);
-		}
-	}
 }
 
 #ifdef CONFIG_INTR_REMAP

From c29d9db338db606c3335a03f337e1d4b7f6bb727 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 1 Dec 2009 15:31:16 -0800
Subject: [PATCH 27/30] x86, ioapic: Fix the EOI register detection mechanism

Maciej W. Rozycki reported:

> 82093AA I/O APIC has its version set to 0x11 and it
> does not support the EOI register.  Similarly I/O APICs
> integrated into the 82379AB south bridge and the 82374EB/SB
> EISA component.

IO-APIC versions below 0x20 don't support EOI register.

Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
version as 0x2. This is an error with documentation and these
ICH chips use io-apic's of version 0x20 and indeed has a working
EOI register for the io-apic.

Fix the EOI register detection mechanism to check for version
0x20 and beyond.

And also, a platform can potentially  have io-apic's with
different versions. Make the EOI register check per io-apic.

Reported-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: ebiederm@xmission.com
Cc: garyhade@us.ibm.com
LKML-Reference: <20091201233335.065361533@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 115 +++++++++++++++++----------------
 1 file changed, 61 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b377b973899..78960a3b0ed 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -539,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 	add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+				 int mask_and, int mask_or,
+				 void (*final)(struct irq_pin_list *entry))
+{
+	unsigned int reg, pin;
+
+	pin = entry->pin;
+	reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+	reg &= mask_and;
+	reg |= mask_or;
+	io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+	if (final)
+		final(entry);
+}
+
 static void io_apic_modify_irq(struct irq_cfg *cfg,
 			       int mask_and, int mask_or,
 			       void (*final)(struct irq_pin_list *entry))
 {
-	int pin;
 	struct irq_pin_list *entry;
 
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		unsigned int reg;
-		pin = entry->pin;
-		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
-		reg &= mask_and;
-		reg |= mask_or;
-		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
-		if (final)
-			final(entry);
-	}
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		__io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+			     IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+			     IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 
 static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -579,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-			IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
-			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
 	struct irq_cfg *cfg = desc->chip_data;
@@ -2492,17 +2498,42 @@ static void ack_apic_edge(unsigned int irq)
 
 atomic_t irq_mis_count;
 
-static int use_eoi_reg __read_mostly;
-
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
 
 	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		if (irq_remapped(irq))
-			io_apic_eoi(entry->apic, entry->pin);
-		else
-			io_apic_eoi(entry->apic, cfg->vector);
+		if (mp_ioapics[entry->apic].apicver >= 0x20) {
+			/*
+			 * Intr-remapping uses pin number as the virtual vector
+			 * in the RTE. Actual vector is programmed in
+			 * intr-remapping table entry. Hence for the io-apic
+			 * EOI we use the pin number.
+			 */
+			if (irq_remapped(irq))
+				io_apic_eoi(entry->apic, entry->pin);
+			else
+				io_apic_eoi(entry->apic, cfg->vector);
+		} else {
+			__mask_and_edge_IO_APIC_irq(entry);
+			__unmask_and_level_IO_APIC_irq(entry);
+		}
 	}
 }
 
@@ -2520,23 +2551,6 @@ static void eoi_ioapic_irq(struct irq_desc *desc)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static int ioapic_supports_eoi(void)
-{
-	struct pci_dev *root;
-
-	root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
-	if (root && root->vendor == PCI_VENDOR_ID_INTEL &&
-	    mp_ioapics[0].apicver >= 0x2) {
-		use_eoi_reg = 1;
-		printk(KERN_INFO "IO-APIC supports EOI register\n");
-	} else
-		printk(KERN_INFO "IO-APIC doesn't support EOI\n");
-
-	return 0;
-}
-
-fs_initcall(ioapic_supports_eoi);
-
 static void ack_apic_level(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2587,14 +2601,7 @@ static void ack_apic_level(unsigned int irq)
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 
-		if (use_eoi_reg)
-			eoi_ioapic_irq(desc);
-		else {
-			spin_lock(&ioapic_lock);
-			__mask_and_edge_IO_APIC_irq(cfg);
-			__unmask_and_level_IO_APIC_irq(cfg);
-			spin_unlock(&ioapic_lock);
-		}
+		eoi_ioapic_irq(desc);
 	}
 
 	/* Now we can move and renable the irq */

From 1c83995b6c7c6bb795bce80f75fbffb15f78db2d Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 1 Dec 2009 15:31:17 -0800
Subject: [PATCH 28/30] x86, ioapic: Document another case when level irq is
 seen as an edge

In the case when cpu goes offline, fixup_irqs() will forward any
unhandled interrupt on the offlined cpu to the new cpu
destination that is handling the corresponding interrupt. This
interrupt forwarding is done via IPI's. Hence, in this case also
level-triggered io-apic interrupt will be seen as an edge
interrupt in the cpu's APIC IRR.

Document this scenario in the code which handles this case by doing
an explicit EOI to the io-apic to clear remote IRR of the io-apic RTE.

Requested-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: ebiederm@xmission.com
Cc: garyhade@us.ibm.com
LKML-Reference: <20091201233335.143970505@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/io_apic.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 78960a3b0ed..c0b4468683f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2586,6 +2586,19 @@ static void ack_apic_level(unsigned int irq)
 	 * level-triggered interrupt.  We mask the source for the time of the
 	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
 	 * The idea is from Manfred Spraul.  --macro
+	 *
+	 * Also in the case when cpu goes offline, fixup_irqs() will forward
+	 * any unhandled interrupt on the offlined cpu to the new cpu
+	 * destination that is handling the corresponding interrupt. This
+	 * interrupt forwarding is done via IPI's. Hence, in this case also
+	 * level-triggered io-apic interrupt will be seen as an edge
+	 * interrupt in the IRR. And we can't rely on the cpu's EOI
+	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+	 * supporting EOI register, we do an explicit EOI to clear the
+	 * remote IRR and on IO-APIC's which don't have an EOI register,
+	 * we use the above logic (mask+edge followed by unmask+level) from
+	 * Manfred Spraul to clear the remote IRR.
 	 */
 	cfg = desc->chip_data;
 	i = cfg->vector;
@@ -2597,7 +2610,13 @@ static void ack_apic_level(unsigned int irq)
 	 */
 	ack_APIC_irq();
 
-	/* Tail end of version 0x11 I/O APIC bug workaround */
+	/*
+	 * Tail end of clearing remote IRR bit (either by delivering the EOI
+	 * message via io-apic EOI register write or simulating it using
+	 * mask+edge followed by unnask+level logic) manually when the
+	 * level triggered interrupt is seen as the edge triggered interrupt
+	 * at the cpu.
+	 */
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 

From 6d20792e85187b27ae3d1b76678a2dd7025e8bc2 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 1 Dec 2009 15:31:18 -0800
Subject: [PATCH 29/30] x86: Remove unnecessary mdelay() from
 cpu_disable_common()

fixup_irqs() already has a mdelay(). Remove the extra and
unnecessary mdelay() from cpu_disable_common().

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Maciej W. Rozycki <macro@linux-mips.org>
Cc: ebiederm@xmission.com
Cc: garyhade@us.ibm.com
LKML-Reference: <20091201233335.232177348@sbs-t61.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 565ebc65920..324f2a44c22 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu)
 void cpu_disable_common(void)
 {
 	int cpu = smp_processor_id();
-	/*
-	 * HACK:
-	 * Allow any queued timer interrupts to get serviced
-	 * This is only a temporary solution until we cleanup
-	 * fixup_irqs as we do for IA64.
-	 */
-	local_irq_enable();
-	mdelay(1);
 
-	local_irq_disable();
 	remove_siblinginfo(cpu);
 
 	/* It's now safe to remove this processor from the online map */

From 7d1849aff6687a135a8da3a75e32a00e3137a5e2 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Thu, 3 Dec 2009 15:52:44 +0100
Subject: [PATCH 30/30] x86, apic: Enable lapic nmi watchdog on AMD Family 11h

The x86 lapic nmi watchdog does not recognize AMD Family 11h,
resulting in:

  NMI watchdog: CPU not supported

As far as I can see from available documentation (the BKDM),
family 11h looks identical to family 10h as far as the PMU
is concerned.

Extending the check to accept family 11h results in:

  Testing NMI watchdog ... OK.

I've been running with this change on a Turion X2 Ultra ZM-82
laptop for a couple of weeks now without problems.

Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: <stable@kernel.org>
LKML-Reference: <19223.53436.931768.278021@pilspetsen.it.uu.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index fab786f60ed..898df9719af 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void)
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
 		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
-		    boot_cpu_data.x86 != 16)
+		    boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
 			return;
 		wd_ops = &k7_wd_ops;
 		break;