[GIT pull] x86/apic for v5.11-rc1

From: Thomas Gleixner
Date: Mon Dec 14 2020 - 15:27:25 EST


Linus,

please pull the latest x86/apic branch from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-apic-2020-12-14

up to: 058df195c234: x86/ioapic: Cleanup the timer_works() irqflags mess


Yet another large set of x86 interrupt management updates:

- Simplification and distangling of the MSI related functionality

- Let IO/APIC construct the RTE entries from an MSI message instead of
having IO/APIC specific code in the interrupt remapping drivers

- Make the retrieval of the parent interrupt domain (vector or remap
unit) less hardcoded and use the relevant irqdomain callbacks for
selection.

- Allow the handling of more than 255 CPUs without a virtualized IOMMU
when the hypervisor supports it. This has made been possible by the
above modifications and also simplifies the existing workaround in the
HyperV specific virtual IOMMU.

- Cleanup of the historical timer_works() irq flags related
inconsistencies.

Thanks,

tglx

------------------>
David Woodhouse (22):
x86/apic: Fix x2apic enablement without interrupt remapping
x86/msi: Only use high bits of MSI address for DMAR unit
x86/apic: Always provide irq_compose_msi_msg() method for vector domain
x86/hpet: Move MSI support into hpet.c
x86/ioapic: Generate RTE directly from parent irqchip's MSI message
genirq/irqdomain: Implement get_name() method on irqchip fwnodes
x86/apic: Add select() method on vector irqdomain
iommu/amd: Implement select() method on remapping irqdomain
iommu/vt-d: Implement select() method on remapping irqdomain
iommu/hyper-v: Implement select() method on remapping irqdomain
x86/hpet: Use irq_find_matching_fwspec() to find remapping irqdomain
x86/ioapic: Use irq_find_matching_fwspec() to find remapping irqdomain
x86: Kill all traces of irq_remapping_get_irq_domain()
iommu/vt-d: Simplify intel_irq_remapping_select()
x86/ioapic: Handle Extended Destination ID field in RTE
x86/apic: Support 15 bits of APIC ID in MSI where available
iommu/hyper-v: Disable IRQ pseudo-remapping if 15 bit APIC IDs are available
x86/kvm: Enable 15-bit extension when KVM_FEATURE_MSI_EXT_DEST_ID detected
x86/ioapic: Use I/O-APIC ID for finding irqdomain, not index
iommu/amd: Fix union of bitfields in intcapxt support
iommu/amd: Don't register interrupt remapping irqdomain when IR is disabled
iommu/amd: Fix IOMMU interrupt generation in X2APIC mode

Dexuan Cui (2):
x86/hyperv: Enable 15-bit APIC ID if the hypervisor supports it
iommu/hyper-v: Remove I/O-APIC ID check from hyperv_irq_remapping_select()

Thomas Gleixner (18):
x86/apic/uv: Fix inconsistent destination mode
x86/devicetree: Fix the ioapic interrupt type table
x86/apic: Cleanup delivery mode defines
x86/apic: Replace pointless apic:: Dest_logical usage
x86/apic: Get rid of apic:: Dest_logical
x86/apic: Cleanup destination mode
genirq/msi: Allow shadow declarations of msi_msg:: $member
x86/msi: Provide msi message shadow structs
iommu/intel: Use msi_msg shadow structs
iommu/amd: Use msi_msg shadow structs
PCI: vmd: Use msi_msg shadow structs
x86/kvm: Use msi_msg shadow structs
x86/pci/xen: Use msi_msg shadow structs
x86/msi: Remove msidef.h
x86/io_apic: Cleanup trigger/polarity helpers
x86/ioapic: Cleanup IO/APIC route entry structs
x86/ioapic: Correct the PCI/ISA trigger type selection
x86/ioapic: Cleanup the timer_works() irqflags mess


arch/x86/include/asm/apic.h | 16 +-
arch/x86/include/asm/apicdef.h | 16 +-
arch/x86/include/asm/hpet.h | 11 -
arch/x86/include/asm/hw_irq.h | 14 +-
arch/x86/include/asm/hyperv-tlfs.h | 7 +
arch/x86/include/asm/io_apic.h | 79 ++---
arch/x86/include/asm/irq_remapping.h | 9 -
arch/x86/include/asm/irqdomain.h | 3 +
arch/x86/include/asm/msi.h | 50 ++++
arch/x86/include/asm/msidef.h | 57 ----
arch/x86/include/asm/x86_init.h | 2 +
arch/x86/kernel/apic/apic.c | 73 ++++-
arch/x86/kernel/apic/apic_flat_64.c | 18 +-
arch/x86/kernel/apic/apic_noop.c | 10 +-
arch/x86/kernel/apic/apic_numachip.c | 16 +-
arch/x86/kernel/apic/bigsmp_32.c | 9 +-
arch/x86/kernel/apic/io_apic.c | 525 +++++++++++++++++-----------------
arch/x86/kernel/apic/ipi.c | 6 +-
arch/x86/kernel/apic/msi.c | 153 +---------
arch/x86/kernel/apic/probe_32.c | 9 +-
arch/x86/kernel/apic/vector.c | 49 ++++
arch/x86/kernel/apic/x2apic_cluster.c | 10 +-
arch/x86/kernel/apic/x2apic_phys.c | 17 +-
arch/x86/kernel/apic/x2apic_uv_x.c | 12 +-
arch/x86/kernel/cpu/mshyperv.c | 29 ++
arch/x86/kernel/devicetree.c | 30 +-
arch/x86/kernel/hpet.c | 122 +++++++-
arch/x86/kernel/kvm.c | 6 +
arch/x86/kernel/smpboot.c | 8 +-
arch/x86/kernel/x86_init.c | 1 +
arch/x86/kvm/irq_comm.c | 31 +-
arch/x86/pci/intel_mid_pci.c | 8 +-
arch/x86/pci/xen.c | 26 +-
arch/x86/platform/uv/uv_irq.c | 4 +-
arch/x86/xen/apic.c | 7 +-
drivers/iommu/amd/amd_iommu_types.h | 2 +-
drivers/iommu/amd/init.c | 225 ++++++++++-----
drivers/iommu/amd/iommu.c | 93 +++---
drivers/iommu/hyperv-iommu.c | 44 +--
drivers/iommu/intel/irq_remapping.c | 102 +++----
drivers/iommu/irq_remapping.c | 14 -
drivers/iommu/irq_remapping.h | 3 -
drivers/pci/controller/pci-hyperv.c | 6 +-
drivers/pci/controller/vmd.c | 9 +-
include/asm-generic/msi.h | 4 +
include/linux/msi.h | 46 ++-
kernel/irq/irqdomain.c | 11 +-
47 files changed, 1056 insertions(+), 946 deletions(-)
delete mode 100644 arch/x86/include/asm/msidef.h

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 4e3099d9ae62..34cb3c159481 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -259,6 +259,7 @@ static inline u64 native_x2apic_icr_read(void)

extern int x2apic_mode;
extern int x2apic_phys;
+extern void __init x2apic_set_max_apicid(u32 apicid);
extern void __init check_x2apic(void);
extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
@@ -305,11 +306,10 @@ struct apic {
void (*send_IPI_all)(int vector);
void (*send_IPI_self)(int vector);

- /* dest_logical is used by the IPI functions */
- u32 dest_logical;
u32 disable_esr;
- u32 irq_delivery_mode;
- u32 irq_dest_mode;
+
+ enum apic_delivery_modes delivery_mode;
+ bool dest_mode_logical;

u32 (*calc_dest_apicid)(unsigned int cpu);

@@ -520,12 +520,10 @@ static inline void apic_smt_update(void) { }
#endif

struct msi_msg;
+struct irq_cfg;

-#ifdef CONFIG_PCI_MSI
-void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg);
-#else
-# define x86_vector_msi_compose_msg NULL
-#endif
+extern void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
+ bool dmar);

extern void ioapic_zap_locks(void);

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 05e694ed8386..5716f22f81ac 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -432,15 +432,13 @@ struct local_apic {
#define BAD_APICID 0xFFFFu
#endif

-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
+enum apic_delivery_modes {
+ APIC_DELIVERY_MODE_FIXED = 0,
+ APIC_DELIVERY_MODE_LOWESTPRIO = 1,
+ APIC_DELIVERY_MODE_SMI = 2,
+ APIC_DELIVERY_MODE_NMI = 4,
+ APIC_DELIVERY_MODE_INIT = 5,
+ APIC_DELIVERY_MODE_EXTINT = 7,
};

#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 6352dee37cda..ab9f3dd87c80 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -74,17 +74,6 @@ extern void hpet_disable(void);
extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);

-struct irq_data;
-struct hpet_channel;
-struct irq_domain;
-
-extern void hpet_msi_unmask(struct irq_data *data);
-extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg);
-extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
-extern int hpet_assign_irq(struct irq_domain *domain,
- struct hpet_channel *hc, int dev_num);
-
#ifdef CONFIG_HPET_EMULATE_RTC

#include <linux/interrupt.h>
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index a4aeeaace040..d465ece58151 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -39,18 +39,16 @@ enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_PCI_MSI,
X86_IRQ_ALLOC_TYPE_PCI_MSIX,
X86_IRQ_ALLOC_TYPE_DMAR,
+ X86_IRQ_ALLOC_TYPE_AMDVI,
X86_IRQ_ALLOC_TYPE_UV,
- X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT,
- X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT,
};

struct ioapic_alloc_info {
- int pin;
- int node;
- u32 trigger : 1;
- u32 polarity : 1;
- u32 valid : 1;
- struct IO_APIC_route_entry *entry;
+ int pin;
+ int node;
+ u32 is_level : 1;
+ u32 active_low : 1;
+ u32 valid : 1;
};

struct uv_alloc_info {
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 0ed20e8bba9e..6bf42aed387e 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -23,6 +23,13 @@
#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A

+#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081
+#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */
+
+#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082
+/* Support for the extended IOAPIC RTE format */
+#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2)
+
#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000
#define HYPERV_CPUID_MIN 0x40000005
#define HYPERV_CPUID_MAX 0x4000ffff
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index a1a26f6d3aa4..437aa8d00e53 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -13,15 +13,6 @@
* Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
*/

-/* I/O Unit Redirection Table */
-#define IO_APIC_REDIR_VECTOR_MASK 0x000FF
-#define IO_APIC_REDIR_DEST_LOGICAL 0x00800
-#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
-#define IO_APIC_REDIR_SEND_PENDING (1 << 12)
-#define IO_APIC_REDIR_REMOTE_IRR (1 << 14)
-#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
-#define IO_APIC_REDIR_MASKED (1 << 16)
-
/*
* The structure of the IO-APIC:
*/
@@ -65,52 +56,40 @@ union IO_APIC_reg_03 {
};

struct IO_APIC_route_entry {
- __u32 vector : 8,
- delivery_mode : 3, /* 000: FIXED
- * 001: lowest prio
- * 111: ExtINT
- */
- dest_mode : 1, /* 0: physical, 1: logical */
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1, /* 0: edge, 1: level */
- mask : 1, /* 0: enabled, 1: disabled */
- __reserved_2 : 15;
-
- __u32 __reserved_3 : 24,
- dest : 8;
-} __attribute__ ((packed));
-
-struct IR_IO_APIC_route_entry {
- __u64 vector : 8,
- zero : 3,
- index2 : 1,
- delivery_status : 1,
- polarity : 1,
- irr : 1,
- trigger : 1,
- mask : 1,
- reserved : 31,
- format : 1,
- index : 15;
+ union {
+ struct {
+ u64 vector : 8,
+ delivery_mode : 3,
+ dest_mode_logical : 1,
+ delivery_status : 1,
+ active_low : 1,
+ irr : 1,
+ is_level : 1,
+ masked : 1,
+ reserved_0 : 15,
+ reserved_1 : 17,
+ virt_destid_8_14 : 7,
+ destid_0_7 : 8;
+ };
+ struct {
+ u64 ir_shared_0 : 8,
+ ir_zero : 3,
+ ir_index_15 : 1,
+ ir_shared_1 : 5,
+ ir_reserved_0 : 31,
+ ir_format : 1,
+ ir_index_0_14 : 15;
+ };
+ struct {
+ u64 w1 : 32,
+ w2 : 32;
+ };
+ };
} __attribute__ ((packed));

struct irq_alloc_info;
struct ioapic_domain_cfg;

-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
-#define IOAPIC_MASKED 1
-#define IOAPIC_UNMASKED 0
-
-#define IOAPIC_POL_HIGH 0
-#define IOAPIC_POL_LOW 1
-
-#define IOAPIC_DEST_MODE_PHYSICAL 0
-#define IOAPIC_DEST_MODE_LOGICAL 1
-
#define IOAPIC_MAP_ALLOC 0x1
#define IOAPIC_MAP_CHECK 0x2

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index af4a151d70b3..7cc49432187f 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -44,9 +44,6 @@ extern int irq_remapping_reenable(int);
extern int irq_remap_enable_fault_handling(void);
extern void panic_if_irq_remap(const char *msg);

-extern struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info);
-
/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
extern struct irq_domain *
arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
@@ -71,11 +68,5 @@ static inline void panic_if_irq_remap(const char *msg)
{
}

-static inline struct irq_domain *
-irq_remapping_get_irq_domain(struct irq_alloc_info *info)
-{
- return NULL;
-}
-
#endif /* CONFIG_IRQ_REMAP */
#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index cd684d45cb5f..125c23b7bad3 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -12,6 +12,9 @@ enum {
X86_IRQ_ALLOC_LEGACY = 0x2,
};

+extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec);
+extern int x86_fwspec_is_hpet(struct irq_fwspec *fwspec);
+
extern struct irq_domain *x86_vector_domain;

extern void init_irq_alloc_info(struct irq_alloc_info *info,
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index cd30013d15d3..b85147d75626 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -9,4 +9,54 @@ typedef struct irq_alloc_info msi_alloc_info_t;
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg);

+/* Structs and defines for the X86 specific MSI message format */
+
+typedef struct x86_msi_data {
+ u32 vector : 8,
+ delivery_mode : 3,
+ dest_mode_logical : 1,
+ reserved : 2,
+ active_low : 1,
+ is_level : 1;
+
+ u32 dmar_subhandle;
+} __attribute__ ((packed)) arch_msi_msg_data_t;
+#define arch_msi_msg_data x86_msi_data
+
+typedef struct x86_msi_addr_lo {
+ union {
+ struct {
+ u32 reserved_0 : 2,
+ dest_mode_logical : 1,
+ redirect_hint : 1,
+ reserved_1 : 1,
+ virt_destid_8_14 : 7,
+ destid_0_7 : 8,
+ base_address : 12;
+ };
+ struct {
+ u32 dmar_reserved_0 : 2,
+ dmar_index_15 : 1,
+ dmar_subhandle_valid : 1,
+ dmar_format : 1,
+ dmar_index_0_14 : 15,
+ dmar_base_address : 12;
+ };
+ };
+} __attribute__ ((packed)) arch_msi_msg_addr_lo_t;
+#define arch_msi_msg_addr_lo x86_msi_addr_lo
+
+#define X86_MSI_BASE_ADDRESS_LOW (0xfee00000 >> 20)
+
+typedef struct x86_msi_addr_hi {
+ u32 reserved : 8,
+ destid_8_31 : 24;
+} __attribute__ ((packed)) arch_msi_msg_addr_hi_t;
+#define arch_msi_msg_addr_hi x86_msi_addr_hi
+
+#define X86_MSI_BASE_ADDRESS_HIGH (0)
+
+struct msi_msg;
+u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid);
+
#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h
deleted file mode 100644
index ee2f8ccc32d0..000000000000
--- a/arch/x86/include/asm/msidef.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MSIDEF_H
-#define _ASM_X86_MSIDEF_H
-
-/*
- * Constants for Intel APIC based MSI messages.
- */
-
-/*
- * Shifts for MSI data
- */
-
-#define MSI_DATA_VECTOR_SHIFT 0
-#define MSI_DATA_VECTOR_MASK 0x000000ff
-#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \
- MSI_DATA_VECTOR_MASK)
-
-#define MSI_DATA_DELIVERY_MODE_SHIFT 8
-#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
-#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT 14
-#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
-#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT 15
-#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
-#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for msi address
- */
-
-#define MSI_ADDR_BASE_HI 0
-#define MSI_ADDR_BASE_LO 0xfee00000
-
-#define MSI_ADDR_DEST_MODE_SHIFT 2
-#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT)
-#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT 3
-#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
- /* dedicated cpu */
-#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
- /* lowest priority */
-
-#define MSI_ADDR_DEST_ID_SHIFT 12
-#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
-#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
- MSI_ADDR_DEST_ID_MASK)
-#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00)
-
-#define MSI_ADDR_IR_EXT_INT (1 << 4)
-#define MSI_ADDR_IR_SHV (1 << 3)
-#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
-#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
-#endif /* _ASM_X86_MSIDEF_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index dde5b3f1e7cd..5c69f7eb5d47 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -116,6 +116,7 @@ struct x86_init_pci {
* @init_platform: platform setup
* @guest_late_init: guest late init
* @x2apic_available: X2APIC detection
+ * @msi_ext_dest_id: MSI supports 15-bit APIC IDs
* @init_mem_mapping: setup early mappings during init_mem_mapping()
* @init_after_bootmem: guest init after boot allocator is finished
*/
@@ -123,6 +124,7 @@ struct x86_hyper_init {
void (*init_platform)(void);
void (*guest_late_init)(void);
bool (*x2apic_available)(void);
+ bool (*msi_ext_dest_id)(void);
void (*init_mem_mapping)(void);
void (*init_after_bootmem)(void);
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b3eef1d5c903..6bd20c0de8bc 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -93,6 +93,11 @@ static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
*/
static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;

+/*
+ * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
+ */
+static bool virt_ext_dest_id __ro_after_init;
+
/*
* Map cpu index to physical APIC ID
*/
@@ -1591,7 +1596,7 @@ static void setup_local_APIC(void)
apic->init_apic_ldr();

#ifdef CONFIG_X86_32
- if (apic->dest_logical) {
+ if (apic->dest_mode_logical) {
int logical_apicid, ldr_apicid;

/*
@@ -1841,20 +1846,34 @@ static __init void try_to_enable_x2apic(int remap_mode)
return;

if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
- /* IR is required if there is APIC ID > 255 even when running
- * under KVM
+ u32 apic_limit = 255;
+
+ /*
+ * Using X2APIC without IR is not architecturally supported
+ * on bare metal but may be supported in guests.
*/
- if (max_physical_apicid > 255 ||
- !x86_init.hyper.x2apic_available()) {
+ if (!x86_init.hyper.x2apic_available()) {
pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
x2apic_disable();
return;
}

/*
- * without IR all CPUs can be addressed by IOAPIC/MSI
- * only in physical mode
+ * If the hypervisor supports extended destination ID in
+ * MSI, that increases the maximum APIC ID that can be
+ * used for non-remapped IRQ domains.
*/
+ if (x86_init.hyper.msi_ext_dest_id()) {
+ virt_ext_dest_id = 1;
+ apic_limit = 32767;
+ }
+
+ /*
+ * Without IR, all CPUs can be addressed by IOAPIC/MSI only
+ * in physical mode, and CPUs with an APIC ID that cannnot
+ * be addressed must not be brought online.
+ */
+ x2apic_set_max_apicid(apic_limit);
x2apic_phys = 1;
}
x2apic_enable();
@@ -2478,6 +2497,46 @@ int hard_smp_processor_id(void)
return read_apic_id();
}

+void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
+ bool dmar)
+{
+ memset(msg, 0, sizeof(*msg));
+
+ msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
+ msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF;
+
+ msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ msg->arch_data.vector = cfg->vector;
+
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+ /*
+ * Only the IOMMU itself can use the trick of putting destination
+ * APIC ID into the high bits of the address. Anything else would
+ * just be writing to memory if it tried that, and needs IR to
+ * address APICs which can't be addressed in the normal 32-bit
+ * address range at 0xFFExxxxx. That is typically just 8 bits, but
+ * some hypervisors allow the extended destination ID field in bits
+ * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
+ */
+ if (dmar)
+ msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
+ else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
+ msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
+ else
+ WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
+}
+
+u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
+{
+ u32 dest = msg->arch_addr_lo.destid_0_7;
+
+ if (extid)
+ dest |= msg->arch_addr_hi.destid_8_31 << 8;
+ return dest;
+}
+EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
+
/*
* Override the generic EOI implementation with an optimized version.
* Only called during early boot when only one CPU is active and with
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 7862b152a052..8f72b4351c9f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -53,7 +53,7 @@ static void _flat_send_IPI_mask(unsigned long mask, int vector)
unsigned long flags;

local_irq_save(flags);
- __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}

@@ -113,15 +113,13 @@ static struct apic apic_flat __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 1, /* logical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = true,

.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = flat_init_apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
@@ -206,15 +204,13 @@ static struct apic apic_physflat __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = false,

.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = physflat_init_apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 780c702969b7..fe78319e0f7a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -95,19 +95,15 @@ struct apic apic_noop __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = noop_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- /* logical delivery broadcast to all CPUs: */
- .irq_dest_mode = 1,
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = true,

.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = default_check_apicid_used,

+ .check_apicid_used = default_check_apicid_used,
.init_apic_ldr = noop_init_apic_ldr,
-
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
-
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,

diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 35edd57f064a..a54d817eb4b6 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -246,15 +246,13 @@ static const struct apic apic_numachip1 __refconst = {
.apic_id_valid = numachip_apic_id_valid,
.apic_id_registered = numachip_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = false,

.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = flat_init_apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
@@ -295,15 +293,13 @@ static const struct apic apic_numachip2 __refconst = {
.apic_id_valid = numachip_apic_id_valid,
.apic_id_registered = numachip_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = false,

.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = flat_init_apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 98d015a4405a..77555f66c14d 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -127,16 +127,13 @@ static struct apic apic_bigsmp __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = bigsmp_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- /* phys delivery to target CPU: */
- .irq_dest_mode = 0,
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = false,

.disable_esr = 1,
- .dest_logical = 0,
- .check_apicid_used = bigsmp_check_apicid_used,

+ .check_apicid_used = bigsmp_check_apicid_used,
.init_apic_ldr = bigsmp_init_apic_ldr,
-
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7b3c7e0d4a09..e4ab4804b20d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -48,6 +48,7 @@
#include <linux/jiffies.h> /* time_after() */
#include <linux/slab.h>
#include <linux/memblock.h>
+#include <linux/msi.h>

#include <asm/irqdomain.h>
#include <asm/io.h>
@@ -63,7 +64,6 @@
#include <asm/setup.h>
#include <asm/irq_remapping.h>
#include <asm/hw_irq.h>
-
#include <asm/apic.h>

#define for_each_ioapic(idx) \
@@ -89,12 +89,12 @@ struct irq_pin_list {
};

struct mp_chip_data {
- struct list_head irq_2_pin;
- struct IO_APIC_route_entry entry;
- int trigger;
- int polarity;
+ struct list_head irq_2_pin;
+ struct IO_APIC_route_entry entry;
+ bool is_level;
+ bool active_low;
+ bool isa_irq;
u32 count;
- bool isa_irq;
};

struct mp_ioapic_gsi {
@@ -286,31 +286,26 @@ static void io_apic_write(unsigned int apic, unsigned int reg,
writel(value, &io_apic->data);
}

-union entry_union {
- struct { u32 w1, w2; };
- struct IO_APIC_route_entry entry;
-};
-
static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
{
- union entry_union eu;
+ struct IO_APIC_route_entry entry;

- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ entry.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+ entry.w2 = io_apic_read(apic, 0x11 + 2 * pin);

- return eu.entry;
+ return entry;
}

static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
{
- union entry_union eu;
+ struct IO_APIC_route_entry entry;
unsigned long flags;

raw_spin_lock_irqsave(&ioapic_lock, flags);
- eu.entry = __ioapic_read_entry(apic, pin);
+ entry = __ioapic_read_entry(apic, pin);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);

- return eu.entry;
+ return entry;
}

/*
@@ -321,11 +316,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
*/
static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- union entry_union eu = {{0, 0}};
-
- eu.entry = e;
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+ io_apic_write(apic, 0x11 + 2*pin, e.w2);
+ io_apic_write(apic, 0x10 + 2*pin, e.w1);
}

static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
@@ -344,12 +336,12 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
*/
static void ioapic_mask_entry(int apic, int pin)
{
+ struct IO_APIC_route_entry e = { .masked = true };
unsigned long flags;
- union entry_union eu = { .entry.mask = IOAPIC_MASKED };

raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ io_apic_write(apic, 0x10 + 2*pin, e.w1);
+ io_apic_write(apic, 0x11 + 2*pin, e.w2);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}

@@ -422,20 +414,15 @@ static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
add_pin_to_irq_node(data, node, newapic, newpin);
}

-static void io_apic_modify_irq(struct mp_chip_data *data,
- int mask_and, int mask_or,
+static void io_apic_modify_irq(struct mp_chip_data *data, bool masked,
void (*final)(struct irq_pin_list *entry))
{
- union entry_union eu;
struct irq_pin_list *entry;

- eu.entry = data->entry;
- eu.w1 &= mask_and;
- eu.w1 |= mask_or;
- data->entry = eu.entry;
+ data->entry.masked = masked;

for_each_irq_pin(entry, data->irq_2_pin) {
- io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1);
+ io_apic_write(entry->apic, 0x10 + 2 * entry->pin, data->entry.w1);
if (final)
final(entry);
}
@@ -459,13 +446,13 @@ static void mask_ioapic_irq(struct irq_data *irq_data)
unsigned long flags;

raw_spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ io_apic_modify_irq(data, true, &io_apic_sync);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}

static void __unmask_ioapic(struct mp_chip_data *data)
{
- io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ io_apic_modify_irq(data, false, NULL);
}

static void unmask_ioapic_irq(struct irq_data *irq_data)
@@ -506,8 +493,8 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector)
/*
* Mask the entry and change the trigger mode to edge.
*/
- entry1.mask = IOAPIC_MASKED;
- entry1.trigger = IOAPIC_EDGE;
+ entry1.masked = true;
+ entry1.is_level = false;

__ioapic_write_entry(apic, pin, entry1);

@@ -535,15 +522,15 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)

/* Check delivery_mode to be sure we're not clearing an SMI pin */
entry = ioapic_read_entry(apic, pin);
- if (entry.delivery_mode == dest_SMI)
+ if (entry.delivery_mode == APIC_DELIVERY_MODE_SMI)
return;

/*
* Make sure the entry is masked and re-read the contents to check
* if it is a level triggered pin and if the remote-IRR is set.
*/
- if (entry.mask == IOAPIC_UNMASKED) {
- entry.mask = IOAPIC_MASKED;
+ if (!entry.masked) {
+ entry.masked = true;
ioapic_write_entry(apic, pin, entry);
entry = ioapic_read_entry(apic, pin);
}
@@ -556,8 +543,8 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
* doesn't clear the remote-IRR if the trigger mode is not
* set to level.
*/
- if (entry.trigger == IOAPIC_EDGE) {
- entry.trigger = IOAPIC_LEVEL;
+ if (!entry.is_level) {
+ entry.is_level = true;
ioapic_write_entry(apic, pin, entry);
}
raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -659,8 +646,8 @@ void mask_ioapic_entries(void)
struct IO_APIC_route_entry entry;

entry = ioapics[apic].saved_registers[pin];
- if (entry.mask == IOAPIC_UNMASKED) {
- entry.mask = IOAPIC_MASKED;
+ if (!entry.masked) {
+ entry.masked = true;
ioapic_write_entry(apic, pin, entry);
}
}
@@ -745,44 +732,7 @@ static int __init find_isa_irq_apic(int irq, int type)
return -1;
}

-#ifdef CONFIG_EISA
-/*
- * EISA Edge/Level control register, ELCR
- */
-static int EISA_ELCR(unsigned int irq)
-{
- if (irq < nr_legacy_irqs()) {
- unsigned int port = 0x4d0 + (irq >> 3);
- return (inb(port) >> (irq & 7)) & 1;
- }
- apic_printk(APIC_VERBOSE, KERN_INFO
- "Broken MPtable reports ISA irq %d\n", irq);
- return 0;
-}
-
-#endif
-
-/* ISA interrupts are always active high edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx) (IOAPIC_EDGE)
-#define default_ISA_polarity(idx) (IOAPIC_POL_HIGH)
-
-/* EISA interrupts are always polarity zero and can be edge or level
- * trigger depending on the ELCR value. If an interrupt is listed as
- * EISA conforming in the MP table, that means its trigger type must
- * be read in from the ELCR */
-
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
-#define default_EISA_polarity(idx) default_ISA_polarity(idx)
-
-/* PCI interrupts are always active low level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_PCI_trigger(idx) (IOAPIC_LEVEL)
-#define default_PCI_polarity(idx) (IOAPIC_POL_LOW)
-
-static int irq_polarity(int idx)
+static bool irq_active_low(int idx)
{
int bus = mp_irqs[idx].srcbus;

@@ -791,90 +741,139 @@ static int irq_polarity(int idx)
*/
switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) {
case MP_IRQPOL_DEFAULT:
- /* conforms to spec, ie. bus-type dependent polarity */
- if (test_bit(bus, mp_bus_not_pci))
- return default_ISA_polarity(idx);
- else
- return default_PCI_polarity(idx);
+ /*
+ * Conforms to spec, ie. bus-type dependent polarity. PCI
+ * defaults to low active. [E]ISA defaults to high active.
+ */
+ return !test_bit(bus, mp_bus_not_pci);
case MP_IRQPOL_ACTIVE_HIGH:
- return IOAPIC_POL_HIGH;
+ return false;
case MP_IRQPOL_RESERVED:
pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
fallthrough;
case MP_IRQPOL_ACTIVE_LOW:
default: /* Pointless default required due to do gcc stupidity */
- return IOAPIC_POL_LOW;
+ return true;
}
}

#ifdef CONFIG_EISA
-static int eisa_irq_trigger(int idx, int bus, int trigger)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static bool EISA_ELCR(unsigned int irq)
+{
+ if (irq < nr_legacy_irqs()) {
+ unsigned int port = 0x4d0 + (irq >> 3);
+ return (inb(port) >> (irq & 7)) & 1;
+ }
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "Broken MPtable reports ISA irq %d\n", irq);
+ return false;
+}
+
+/*
+ * EISA interrupts are always active high and can be edge or level
+ * triggered depending on the ELCR value. If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must be
+ * read in from the ELCR.
+ */
+static bool eisa_irq_is_level(int idx, int bus, bool level)
{
switch (mp_bus_id_to_type[bus]) {
case MP_BUS_PCI:
case MP_BUS_ISA:
- return trigger;
+ return level;
case MP_BUS_EISA:
- return default_EISA_trigger(idx);
+ return EISA_ELCR(mp_irqs[idx].srcbusirq);
}
pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus);
- return IOAPIC_LEVEL;
+ return true;
}
#else
-static inline int eisa_irq_trigger(int idx, int bus, int trigger)
+static inline int eisa_irq_is_level(int idx, int bus, bool level)
{
- return trigger;
+ return level;
}
#endif

-static int irq_trigger(int idx)
+static bool irq_is_level(int idx)
{
int bus = mp_irqs[idx].srcbus;
- int trigger;
+ bool level;

/*
* Determine IRQ trigger mode (edge or level sensitive):
*/
switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) {
case MP_IRQTRIG_DEFAULT:
- /* conforms to spec, ie. bus-type dependent trigger mode */
- if (test_bit(bus, mp_bus_not_pci))
- trigger = default_ISA_trigger(idx);
- else
- trigger = default_PCI_trigger(idx);
+ /*
+ * Conforms to spec, ie. bus-type dependent trigger
+ * mode. PCI defaults to level, ISA to edge.
+ */
+ level = !test_bit(bus, mp_bus_not_pci);
/* Take EISA into account */
- return eisa_irq_trigger(idx, bus, trigger);
+ return eisa_irq_is_level(idx, bus, level);
case MP_IRQTRIG_EDGE:
- return IOAPIC_EDGE;
+ return false;
case MP_IRQTRIG_RESERVED:
pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
fallthrough;
case MP_IRQTRIG_LEVEL:
default: /* Pointless default required due to do gcc stupidity */
- return IOAPIC_LEVEL;
+ return true;
}
}

+static int __acpi_get_override_irq(u32 gsi, bool *trigger, bool *polarity)
+{
+ int ioapic, pin, idx;
+
+ if (skip_ioapic_setup)
+ return -1;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -1;
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ if (pin < 0)
+ return -1;
+
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if (idx < 0)
+ return -1;
+
+ *trigger = irq_is_level(idx);
+ *polarity = irq_active_low(idx);
+ return 0;
+}
+
+#ifdef CONFIG_ACPI
+int acpi_get_override_irq(u32 gsi, int *is_level, int *active_low)
+{
+ *is_level = *active_low = 0;
+ return __acpi_get_override_irq(gsi, (bool *)is_level,
+ (bool *)active_low);
+}
+#endif
+
void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node,
int trigger, int polarity)
{
init_irq_alloc_info(info, NULL);
info->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
info->ioapic.node = node;
- info->ioapic.trigger = trigger;
- info->ioapic.polarity = polarity;
+ info->ioapic.is_level = trigger;
+ info->ioapic.active_low = polarity;
info->ioapic.valid = 1;
}

-#ifndef CONFIG_ACPI
-int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
-#endif
-
static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
struct irq_alloc_info *src,
u32 gsi, int ioapic_idx, int pin)
{
- int trigger, polarity;
+ bool level, pol_low;

copy_irq_alloc_info(dst, src);
dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
@@ -883,20 +882,20 @@ static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
dst->ioapic.valid = 1;
if (src && src->ioapic.valid) {
dst->ioapic.node = src->ioapic.node;
- dst->ioapic.trigger = src->ioapic.trigger;
- dst->ioapic.polarity = src->ioapic.polarity;
+ dst->ioapic.is_level = src->ioapic.is_level;
+ dst->ioapic.active_low = src->ioapic.active_low;
} else {
dst->ioapic.node = NUMA_NO_NODE;
- if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) {
- dst->ioapic.trigger = trigger;
- dst->ioapic.polarity = polarity;
+ if (__acpi_get_override_irq(gsi, &level, &pol_low) >= 0) {
+ dst->ioapic.is_level = level;
+ dst->ioapic.active_low = pol_low;
} else {
/*
* PCI interrupts are always active low level
* triggered.
*/
- dst->ioapic.trigger = IOAPIC_LEVEL;
- dst->ioapic.polarity = IOAPIC_POL_LOW;
+ dst->ioapic.is_level = true;
+ dst->ioapic.active_low = true;
}
}
}
@@ -906,12 +905,12 @@ static int ioapic_alloc_attr_node(struct irq_alloc_info *info)
return (info && info->ioapic.valid) ? info->ioapic.node : NUMA_NO_NODE;
}

-static void mp_register_handler(unsigned int irq, unsigned long trigger)
+static void mp_register_handler(unsigned int irq, bool level)
{
irq_flow_handler_t hdl;
bool fasteoi;

- if (trigger) {
+ if (level) {
irq_set_status_flags(irq, IRQ_LEVEL);
fasteoi = true;
} else {
@@ -933,14 +932,14 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
* pin with real trigger and polarity attributes.
*/
if (irq < nr_legacy_irqs() && data->count == 1) {
- if (info->ioapic.trigger != data->trigger)
- mp_register_handler(irq, info->ioapic.trigger);
- data->entry.trigger = data->trigger = info->ioapic.trigger;
- data->entry.polarity = data->polarity = info->ioapic.polarity;
+ if (info->ioapic.is_level != data->is_level)
+ mp_register_handler(irq, info->ioapic.is_level);
+ data->entry.is_level = data->is_level = info->ioapic.is_level;
+ data->entry.active_low = data->active_low = info->ioapic.active_low;
}

- return data->trigger == info->ioapic.trigger &&
- data->polarity == info->ioapic.polarity;
+ return data->is_level == info->ioapic.is_level &&
+ data->active_low == info->ioapic.active_low;
}

static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
@@ -1219,10 +1218,9 @@ void ioapic_zap_locks(void)

static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
{
- int i;
- char buf[256];
struct IO_APIC_route_entry entry;
- struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry;
+ char buf[256];
+ int i;

printk(KERN_DEBUG "IOAPIC %d:\n", apic);
for (i = 0; i <= nr_entries; i++) {
@@ -1230,20 +1228,21 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
snprintf(buf, sizeof(buf),
" pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
i,
- entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ",
- entry.trigger == IOAPIC_LEVEL ? "level" : "edge ",
- entry.polarity == IOAPIC_POL_LOW ? "low " : "high",
+ entry.masked ? "disabled" : "enabled ",
+ entry.is_level ? "level" : "edge ",
+ entry.active_low ? "low " : "high",
entry.vector, entry.irr, entry.delivery_status);
- if (ir_entry->format)
+ if (entry.ir_format) {
printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n",
- buf, (ir_entry->index2 << 15) | ir_entry->index,
- ir_entry->zero);
- else
- printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
buf,
- entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ?
- "logical " : "physical",
- entry.dest, entry.delivery_mode);
+ (entry.ir_index_15 << 15) | entry.ir_index_0_14,
+ entry.ir_zero);
+ } else {
+ printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf,
+ entry.dest_mode_logical ? "logical " : "physical",
+ entry.virt_destid_8_14, entry.destid_0_7,
+ entry.delivery_mode);
+ }
}
}

@@ -1368,7 +1367,8 @@ void __init enable_IO_APIC(void)
/* If the interrupt line is enabled and in ExtInt mode
* I have found the pin where the i8259 is connected.
*/
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+ if (!entry.masked &&
+ entry.delivery_mode == APIC_DELIVERY_MODE_EXTINT) {
ioapic_i8259.apic = apic;
ioapic_i8259.pin = pin;
goto found_i8259;
@@ -1410,14 +1410,16 @@ void native_restore_boot_irq_mode(void)
*/
if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
+ u32 apic_id = read_apic_id();

memset(&entry, 0, sizeof(entry));
- entry.mask = IOAPIC_UNMASKED;
- entry.trigger = IOAPIC_EDGE;
- entry.polarity = IOAPIC_POL_HIGH;
- entry.dest_mode = IOAPIC_DEST_MODE_PHYSICAL;
- entry.delivery_mode = dest_ExtINT;
- entry.dest = read_apic_id();
+ entry.masked = false;
+ entry.is_level = false;
+ entry.active_low = false;
+ entry.dest_mode_logical = false;
+ entry.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
+ entry.destid_0_7 = apic_id & 0xFF;
+ entry.virt_destid_8_14 = apic_id >> 8;

/*
* Add it to the IO-APIC irq-routing table:
@@ -1618,21 +1620,16 @@ static void __init delay_without_tsc(void)
static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- unsigned long flags;

if (no_timer_check)
return 1;

- local_save_flags(flags);
local_irq_enable();
-
if (boot_cpu_has(X86_FEATURE_TSC))
delay_with_tsc();
else
delay_without_tsc();

- local_irq_restore(flags);
-
/*
* Expect a few ticks at least, to be sure some possible
* glue logic does not lock up after one or two first
@@ -1641,10 +1638,10 @@ static int __init timer_irq_works(void)
* least one tick may be lost due to delays.
*/

- /* jiffies wrap? */
- if (time_after(jiffies, t1 + 4))
- return 1;
- return 0;
+ local_irq_disable();
+
+ /* Did jiffies advance? */
+ return time_after(jiffies, t1 + 4);
}

/*
@@ -1696,13 +1693,13 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)

raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, data->irq_2_pin) {
- unsigned int reg;
+ struct IO_APIC_route_entry e;
int pin;

pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ e.w1 = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
- if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+ if (e.irr) {
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return true;
}
@@ -1849,21 +1846,62 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data)
eoi_ioapic_pin(data->entry.vector, data);
}

+/*
+ * The I/OAPIC is just a device for generating MSI messages from legacy
+ * interrupt pins. Various fields of the RTE translate into bits of the
+ * resulting MSI which had a historical meaning.
+ *
+ * With interrupt remapping, many of those bits have different meanings
+ * in the underlying MSI, but the way that the I/OAPIC transforms them
+ * from its RTE to the MSI message is the same. This function allows
+ * the parent IRQ domain to compose the MSI message, then takes the
+ * relevant bits to put them in the appropriate places in the RTE in
+ * order to generate that message when the IRQ happens.
+ *
+ * The setup here relies on a preconfigured route entry (is_level,
+ * active_low, masked) because the parent domain is merely composing the
+ * generic message routing information which is used for the MSI.
+ */
+static void ioapic_setup_msg_from_msi(struct irq_data *irq_data,
+ struct IO_APIC_route_entry *entry)
+{
+ struct msi_msg msg;
+
+ /* Let the parent domain compose the MSI message */
+ irq_chip_compose_msi_msg(irq_data, &msg);
+
+ /*
+ * - Real vector
+ * - DMAR/IR: 8bit subhandle (ioapic.pin)
+ * - AMD/IR: 8bit IRTE index
+ */
+ entry->vector = msg.arch_data.vector;
+ /* Delivery mode (for DMAR/IR all 0) */
+ entry->delivery_mode = msg.arch_data.delivery_mode;
+ /* Destination mode or DMAR/IR index bit 15 */
+ entry->dest_mode_logical = msg.arch_addr_lo.dest_mode_logical;
+ /* DMAR/IR: 1, 0 for all other modes */
+ entry->ir_format = msg.arch_addr_lo.dmar_format;
+ /*
+ * - DMAR/IR: index bit 0-14.
+ *
+ * - Virt: If the host supports x2apic without a virtualized IR
+ * unit then bit 0-6 of dmar_index_0_14 are providing bit
+ * 8-14 of the destination id.
+ *
+ * All other modes have bit 0-6 of dmar_index_0_14 cleared and the
+ * topmost 8 bits are destination id bit 0-7 (entry::destid_0_7).
+ */
+ entry->ir_index_0_14 = msg.arch_addr_lo.dmar_index_0_14;
+}
+
static void ioapic_configure_entry(struct irq_data *irqd)
{
struct mp_chip_data *mpd = irqd->chip_data;
- struct irq_cfg *cfg = irqd_cfg(irqd);
struct irq_pin_list *entry;

- /*
- * Only update when the parent is the vector domain, don't touch it
- * if the parent is the remapping domain. Check the installed
- * ioapic chip to verify that.
- */
- if (irqd->chip == &ioapic_chip) {
- mpd->entry.dest = cfg->dest_apicid;
- mpd->entry.vector = cfg->vector;
- }
+ ioapic_setup_msg_from_msi(irqd, &mpd->entry);
+
for_each_irq_pin(entry, mpd->irq_2_pin)
__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
}
@@ -1919,7 +1957,7 @@ static int ioapic_irq_get_chip_state(struct irq_data *irqd,
* irrelevant because the IO-APIC treats them as fire and
* forget.
*/
- if (rentry.irr && rentry.trigger) {
+ if (rentry.irr && rentry.is_level) {
*state = true;
break;
}
@@ -2027,6 +2065,7 @@ static inline void __init unlock_ExtINT_logic(void)
int apic, pin, i;
struct IO_APIC_route_entry entry0, entry1;
unsigned char save_control, save_freq_select;
+ u32 apic_id;

pin = find_isa_irq_pin(8, mp_INT);
if (pin == -1) {
@@ -2042,14 +2081,16 @@ static inline void __init unlock_ExtINT_logic(void)
entry0 = ioapic_read_entry(apic, pin);
clear_IO_APIC_pin(apic, pin);

+ apic_id = hard_smp_processor_id();
memset(&entry1, 0, sizeof(entry1));

- entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL;
- entry1.mask = IOAPIC_UNMASKED;
- entry1.dest = hard_smp_processor_id();
- entry1.delivery_mode = dest_ExtINT;
- entry1.polarity = entry0.polarity;
- entry1.trigger = IOAPIC_EDGE;
+ entry1.dest_mode_logical = true;
+ entry1.masked = false;
+ entry1.destid_0_7 = apic_id & 0xFF;
+ entry1.virt_destid_8_14 = apic_id >> 8;
+ entry1.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
+ entry1.active_low = entry0.active_low;
+ entry1.is_level = false;
entry1.vector = 0;

ioapic_write_entry(apic, pin, entry1);
@@ -2117,13 +2158,12 @@ static inline void __init check_timer(void)
struct irq_cfg *cfg = irqd_cfg(irq_data);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
- unsigned long flags;
int no_pin1 = 0;

if (!global_clock_event)
return;

- local_irq_save(flags);
+ local_irq_disable();

/*
* get/set the timer IRQ vector:
@@ -2178,9 +2218,9 @@ static inline void __init check_timer(void)
* so only need to unmask if it is level-trigger
* do we really have level trigger timer?
*/
- int idx;
- idx = find_irq_entry(apic1, pin1, mp_INT);
- if (idx != -1 && irq_trigger(idx))
+ int idx = find_irq_entry(apic1, pin1, mp_INT);
+
+ if (idx != -1 && irq_is_level(idx))
unmask_ioapic_irq(irq_get_irq_data(0));
}
irq_domain_deactivate_irq(irq_data);
@@ -2191,7 +2231,6 @@ static inline void __init check_timer(void)
goto out;
}
panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
- local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2215,7 +2254,6 @@ static inline void __init check_timer(void)
/*
* Cleanup, just in case ...
*/
- local_irq_disable();
legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -2232,7 +2270,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -2251,7 +2288,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
- local_irq_disable();
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
if (apic_is_x2apic_enabled())
apic_printk(APIC_QUIET, KERN_INFO
@@ -2260,7 +2296,7 @@ static inline void __init check_timer(void)
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
"report. Then try booting with the 'noapic' option.\n");
out:
- local_irq_restore(flags);
+ local_irq_enable();
}

/*
@@ -2284,36 +2320,37 @@ static inline void __init check_timer(void)

static int mp_irqdomain_create(int ioapic)
{
- struct irq_alloc_info info;
struct irq_domain *parent;
int hwirqs = mp_ioapic_pin_count(ioapic);
struct ioapic *ip = &ioapics[ioapic];
struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
struct fwnode_handle *fn;
- char *name = "IO-APIC";
+ struct irq_fwspec fwspec;

if (cfg->type == IOAPIC_DOMAIN_INVALID)
return 0;

- init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT;
- info.devid = mpc_ioapic_id(ioapic);
- parent = irq_remapping_get_irq_domain(&info);
- if (!parent)
- parent = x86_vector_domain;
- else
- name = "IO-APIC-IR";
-
/* Handle device tree enumerated APICs proper */
if (cfg->dev) {
fn = of_node_to_fwnode(cfg->dev);
} else {
- fn = irq_domain_alloc_named_id_fwnode(name, ioapic);
+ fn = irq_domain_alloc_named_id_fwnode("IO-APIC", mpc_ioapic_id(ioapic));
if (!fn)
return -ENOMEM;
}

+ fwspec.fwnode = fn;
+ fwspec.param_count = 1;
+ fwspec.param[0] = mpc_ioapic_id(ioapic);
+
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY);
+ if (!parent) {
+ if (!cfg->dev)
+ irq_domain_free_fwnode(fn);
+ return -ENODEV;
+ }
+
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
(void *)(long)ioapic);

@@ -2587,30 +2624,6 @@ static int io_apic_get_version(int ioapic)
return reg_01.bits.version;
}

-int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
-{
- int ioapic, pin, idx;
-
- if (skip_ioapic_setup)
- return -1;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return -1;
-
- pin = mp_find_ioapic_pin(ioapic, gsi);
- if (pin < 0)
- return -1;
-
- idx = find_irq_entry(ioapic, pin, mp_INT);
- if (idx < 0)
- return -1;
-
- *trigger = irq_trigger(idx);
- *polarity = irq_polarity(idx);
- return 0;
-}
-
/*
* This function updates target affinity of IOAPIC interrupts to include
* the CPUs which came online during SMP bringup.
@@ -2934,44 +2947,49 @@ static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
struct irq_alloc_info *info)
{
if (info && info->ioapic.valid) {
- data->trigger = info->ioapic.trigger;
- data->polarity = info->ioapic.polarity;
- } else if (acpi_get_override_irq(gsi, &data->trigger,
- &data->polarity) < 0) {
+ data->is_level = info->ioapic.is_level;
+ data->active_low = info->ioapic.active_low;
+ } else if (__acpi_get_override_irq(gsi, &data->is_level,
+ &data->active_low) < 0) {
/* PCI interrupts are always active low level triggered. */
- data->trigger = IOAPIC_LEVEL;
- data->polarity = IOAPIC_POL_LOW;
+ data->is_level = true;
+ data->active_low = true;
}
}

-static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data,
- struct IO_APIC_route_entry *entry)
+/*
+ * Configure the I/O-APIC specific fields in the routing entry.
+ *
+ * This is important to setup the I/O-APIC specific bits (is_level,
+ * active_low, masked) because the underlying parent domain will only
+ * provide the routing information and is oblivious of the I/O-APIC
+ * specific bits.
+ *
+ * The entry is just preconfigured at this point and not written into the
+ * RTE. This happens later during activation which will fill in the actual
+ * routing information.
+ */
+static void mp_preconfigure_entry(struct mp_chip_data *data)
{
+ struct IO_APIC_route_entry *entry = &data->entry;
+
memset(entry, 0, sizeof(*entry));
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->dest = cfg->dest_apicid;
- entry->vector = cfg->vector;
- entry->trigger = data->trigger;
- entry->polarity = data->polarity;
+ entry->is_level = data->is_level;
+ entry->active_low = data->active_low;
/*
* Mask level triggered irqs. Edge triggered irqs are masked
* by the irq core code in case they fire.
*/
- if (data->trigger == IOAPIC_LEVEL)
- entry->mask = IOAPIC_MASKED;
- else
- entry->mask = IOAPIC_UNMASKED;
+ entry->masked = data->is_level;
}

int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
- int ret, ioapic, pin;
- struct irq_cfg *cfg;
- struct irq_data *irq_data;
- struct mp_chip_data *data;
struct irq_alloc_info *info = arg;
+ struct mp_chip_data *data;
+ struct irq_data *irq_data;
+ int ret, ioapic, pin;
unsigned long flags;

if (!info || nr_irqs > 1)
@@ -2989,7 +3007,6 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
if (!data)
return -ENOMEM;

- info->ioapic.entry = &data->entry;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
if (ret < 0) {
kfree(data);
@@ -3003,22 +3020,20 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
irq_data->chip_data = data;
mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);

- cfg = irqd_cfg(irq_data);
add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);

+ mp_preconfigure_entry(data);
+ mp_register_handler(virq, data->is_level);
+
local_irq_save(flags);
- if (info->ioapic.entry)
- mp_setup_entry(cfg, data, info->ioapic.entry);
- mp_register_handler(virq, data->trigger);
if (virq < nr_legacy_irqs())
legacy_pic->mask(virq);
local_irq_restore(flags);

apic_printk(APIC_VERBOSE, KERN_DEBUG
- "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
- ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector,
- virq, data->trigger, data->polarity, cfg->dest_apicid);
-
+ "IOAPIC[%d]: Preconfigured routing entry (%d-%d -> IRQ %d Level:%i ActiveLow:%i)\n",
+ ioapic, mpc_ioapic_id(ioapic), pin, virq,
+ data->is_level, data->active_low);
return 0;
}

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 387154e39e08..d1fb874fbe64 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -260,7 +260,7 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
for_each_cpu(query_cpu, mask)
__default_send_IPI_dest_field(
early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, apic->dest_logical);
+ vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}

@@ -279,7 +279,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
continue;
__default_send_IPI_dest_field(
early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, apic->dest_logical);
+ vector, APIC_DEST_LOGICAL);
}
local_irq_restore(flags);
}
@@ -297,7 +297,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)

local_irq_save(flags);
WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
- __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+ __default_send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}

diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 6313f0a05db7..44ebe25e7703 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -15,7 +15,6 @@
#include <linux/hpet.h>
#include <linux/msi.h>
#include <asm/irqdomain.h>
-#include <asm/msidef.h>
#include <asm/hpet.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
@@ -23,38 +22,11 @@

struct irq_domain *x86_pci_msi_default_domain __ro_after_init;

-static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
-{
- msg->address_hi = MSI_ADDR_BASE_HI;
-
- if (x2apic_enabled())
- msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
-
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((apic->irq_dest_mode == 0) ?
- MSI_ADDR_DEST_MODE_PHYSICAL :
- MSI_ADDR_DEST_MODE_LOGICAL) |
- MSI_ADDR_REDIRECTION_CPU |
- MSI_ADDR_DEST_ID(cfg->dest_apicid);
-
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- MSI_DATA_DELIVERY_FIXED |
- MSI_DATA_VECTOR(cfg->vector);
-}
-
-void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
-{
- __irq_msi_compose_msg(irqd_cfg(data), msg);
-}
-
static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
{
struct msi_msg msg[2] = { [1] = { }, };

- __irq_msi_compose_msg(cfg, msg);
+ __irq_msi_compose_msg(cfg, msg, false);
irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
}

@@ -276,6 +248,17 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
#endif

#ifdef CONFIG_DMAR_TABLE
+/*
+ * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the
+ * high bits of the destination APIC ID. This can't be done in the general
+ * case for MSIs as it would be targeting real memory above 4GiB not the
+ * APIC.
+ */
+static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg, true);
+}
+
static void dmar_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
{
dmar_msi_write(data->irq, msg);
@@ -288,6 +271,7 @@ static struct irq_chip dmar_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_compose_msi_msg = dmar_msi_compose_msg,
.irq_write_msi_msg = dmar_msi_write_msg,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -356,114 +340,3 @@ void dmar_free_hwirq(int irq)
irq_domain_free_irqs(irq, 1);
}
#endif
-
-/*
- * MSI message composition
- */
-#ifdef CONFIG_HPET_TIMER
-static inline int hpet_dev_id(struct irq_domain *domain)
-{
- struct msi_domain_info *info = msi_get_domain_info(domain);
-
- return (int)(long)info->data;
-}
-
-static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
-{
- hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
-}
-
-static struct irq_chip hpet_msi_controller __ro_after_init = {
- .name = "HPET-MSI",
- .irq_unmask = hpet_msi_unmask,
- .irq_mask = hpet_msi_mask,
- .irq_ack = irq_chip_ack_parent,
- .irq_set_affinity = msi_domain_set_affinity,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_write_msi_msg = hpet_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int hpet_msi_init(struct irq_domain *domain,
- struct msi_domain_info *info, unsigned int virq,
- irq_hw_number_t hwirq, msi_alloc_info_t *arg)
-{
- irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
- irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL,
- handle_edge_irq, arg->data, "edge");
-
- return 0;
-}
-
-static void hpet_msi_free(struct irq_domain *domain,
- struct msi_domain_info *info, unsigned int virq)
-{
- irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
-}
-
-static struct msi_domain_ops hpet_msi_domain_ops = {
- .msi_init = hpet_msi_init,
- .msi_free = hpet_msi_free,
-};
-
-static struct msi_domain_info hpet_msi_domain_info = {
- .ops = &hpet_msi_domain_ops,
- .chip = &hpet_msi_controller,
- .flags = MSI_FLAG_USE_DEF_DOM_OPS,
-};
-
-struct irq_domain *hpet_create_irq_domain(int hpet_id)
-{
- struct msi_domain_info *domain_info;
- struct irq_domain *parent, *d;
- struct irq_alloc_info info;
- struct fwnode_handle *fn;
-
- if (x86_vector_domain == NULL)
- return NULL;
-
- domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL);
- if (!domain_info)
- return NULL;
-
- *domain_info = hpet_msi_domain_info;
- domain_info->data = (void *)(long)hpet_id;
-
- init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT;
- info.devid = hpet_id;
- parent = irq_remapping_get_irq_domain(&info);
- if (parent == NULL)
- parent = x86_vector_domain;
- else
- hpet_msi_controller.name = "IR-HPET-MSI";
-
- fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
- hpet_id);
- if (!fn) {
- kfree(domain_info);
- return NULL;
- }
-
- d = msi_create_irq_domain(fn, domain_info, parent);
- if (!d) {
- irq_domain_free_fwnode(fn);
- kfree(domain_info);
- }
- return d;
-}
-
-int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
- int dev_num)
-{
- struct irq_alloc_info info;
-
- init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_HPET;
- info.data = hc;
- info.devid = hpet_dev_id(domain);
- info.hwirq = dev_num;
-
- return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
-}
-#endif
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 67b6f7c049ec..a61f642b1b90 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -69,16 +69,13 @@ static struct apic apic_default __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = default_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- /* logical delivery broadcast to all CPUs: */
- .irq_dest_mode = 1,
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = true,

.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = default_check_apicid_used,

+ .check_apicid_used = default_check_apicid_used,
.init_apic_ldr = default_init_apic_ldr,
-
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = setup_apic_flat_routing,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1eac53632786..b9b05caa28a4 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -636,7 +636,50 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
}
#endif

+int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec)
+{
+ if (fwspec->param_count != 1)
+ return 0;
+
+ if (is_fwnode_irqchip(fwspec->fwnode)) {
+ const char *fwname = fwnode_get_name(fwspec->fwnode);
+ return fwname && !strncmp(fwname, "IO-APIC-", 8) &&
+ simple_strtol(fwname+8, NULL, 10) == fwspec->param[0];
+ }
+ return to_of_node(fwspec->fwnode) &&
+ of_device_is_compatible(to_of_node(fwspec->fwnode),
+ "intel,ce4100-ioapic");
+}
+
+int x86_fwspec_is_hpet(struct irq_fwspec *fwspec)
+{
+ if (fwspec->param_count != 1)
+ return 0;
+
+ if (is_fwnode_irqchip(fwspec->fwnode)) {
+ const char *fwname = fwnode_get_name(fwspec->fwnode);
+ return fwname && !strncmp(fwname, "HPET-MSI-", 9) &&
+ simple_strtol(fwname+9, NULL, 10) == fwspec->param[0];
+ }
+ return 0;
+}
+
+static int x86_vector_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ /*
+ * HPET and I/OAPIC cannot be parented in the vector domain
+ * if IRQ remapping is enabled. APIC IDs above 15 bits are
+ * only permitted if IRQ remapping is enabled, so check that.
+ */
+ if (apic->apic_id_valid(32768))
+ return 0;
+
+ return x86_fwspec_is_ioapic(fwspec) || x86_fwspec_is_hpet(fwspec);
+}
+
static const struct irq_domain_ops x86_vector_domain_ops = {
+ .select = x86_vector_select,
.alloc = x86_vector_alloc_irqs,
.free = x86_vector_free_irqs,
.activate = x86_vector_activate,
@@ -818,6 +861,12 @@ void apic_ack_edge(struct irq_data *irqd)
apic_ack_irq(irqd);
}

+static void x86_vector_msi_compose_msg(struct irq_data *data,
+ struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg, false);
+}
+
static struct irq_chip lapic_controller = {
.name = "APIC",
.irq_ack = apic_ack_edge,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index b0889c48a2ac..df6adc5674c9 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -61,7 +61,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
if (!dest)
continue;

- __x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
/* Remove cluster CPUs from tmpmask */
cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
}
@@ -184,15 +184,13 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.apic_id_valid = x2apic_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 1, /* logical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = true,

.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = init_x2apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index bc9693841353..0e4e81971567 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -8,6 +8,12 @@
int x2apic_phys;

static struct apic apic_x2apic_phys;
+static u32 x2apic_max_apicid __ro_after_init;
+
+void __init x2apic_set_max_apicid(u32 apicid)
+{
+ x2apic_max_apicid = apicid;
+}

static int __init set_x2apic_phys_mode(char *arg)
{
@@ -98,6 +104,9 @@ static int x2apic_phys_probe(void)
/* Common x2apic functions, also used by x2apic_cluster */
int x2apic_apic_id_valid(u32 apicid)
{
+ if (x2apic_max_apicid && apicid > x2apic_max_apicid)
+ return 0;
+
return 1;
}

@@ -148,15 +157,13 @@ static struct apic apic_x2apic_phys __ro_after_init = {
.apic_id_valid = x2apic_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = false,

.disable_esr = 0,
- .dest_logical = 0,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = init_x2apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 714233cee0b5..de94181f4d0c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -703,9 +703,9 @@ static void uv_send_IPI_one(int cpu, int vector)
unsigned long dmode, val;

if (vector == NMI_VECTOR)
- dmode = dest_NMI;
+ dmode = APIC_DELIVERY_MODE_NMI;
else
- dmode = dest_Fixed;
+ dmode = APIC_DELIVERY_MODE_FIXED;

val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(apicid << UVH_IPI_INT_APIC_ID_SHFT) |
@@ -807,15 +807,13 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.apic_id_valid = uv_apic_id_valid,
.apic_id_registered = uv_apic_id_registered,

- .irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* Physical */
+ .delivery_mode = APIC_DELIVERY_MODE_FIXED,
+ .dest_mode_logical = false,

.disable_esr = 0,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = NULL,

+ .check_apicid_used = NULL,
.init_apic_ldr = uv_init_apic_ldr,
-
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 05ef1f4550cb..f628e3dc150f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -366,9 +366,38 @@ static void __init ms_hyperv_init_platform(void)
#endif
}

+static bool __init ms_hyperv_x2apic_available(void)
+{
+ return x2apic_supported();
+}
+
+/*
+ * If ms_hyperv_msi_ext_dest_id() returns true, hyperv_prepare_irq_remapping()
+ * returns -ENODEV and the Hyper-V IOMMU driver is not used; instead, the
+ * generic support of the 15-bit APIC ID is used: see __irq_msi_compose_msg().
+ *
+ * Note: for a VM on Hyper-V, the I/O-APIC is the only device which
+ * (logically) generates MSIs directly to the system APIC irq domain.
+ * There is no HPET, and PCI MSI/MSI-X interrupts are remapped by the
+ * pci-hyperv host bridge.
+ */
+static bool __init ms_hyperv_msi_ext_dest_id(void)
+{
+ u32 eax;
+
+ eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_INTERFACE);
+ if (eax != HYPERV_VS_INTERFACE_EAX_SIGNATURE)
+ return false;
+
+ eax = cpuid_eax(HYPERV_CPUID_VIRT_STACK_PROPERTIES);
+ return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE;
+}
+
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
.type = X86_HYPER_MS_HYPERV,
+ .init.x2apic_available = ms_hyperv_x2apic_available,
+ .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
.init.init_platform = ms_hyperv_init_platform,
};
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index ddffd80f5c52..6a4cb71c2498 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -184,31 +184,31 @@ static unsigned int ioapic_id;

struct of_ioapic_type {
u32 out_type;
- u32 trigger;
- u32 polarity;
+ u32 is_level;
+ u32 active_low;
};

static struct of_ioapic_type of_ioapic_type[] =
{
{
- .out_type = IRQ_TYPE_EDGE_RISING,
- .trigger = IOAPIC_EDGE,
- .polarity = 1,
+ .out_type = IRQ_TYPE_EDGE_FALLING,
+ .is_level = 0,
+ .active_low = 1,
},
{
- .out_type = IRQ_TYPE_LEVEL_LOW,
- .trigger = IOAPIC_LEVEL,
- .polarity = 0,
+ .out_type = IRQ_TYPE_LEVEL_HIGH,
+ .is_level = 1,
+ .active_low = 0,
},
{
- .out_type = IRQ_TYPE_LEVEL_HIGH,
- .trigger = IOAPIC_LEVEL,
- .polarity = 1,
+ .out_type = IRQ_TYPE_LEVEL_LOW,
+ .is_level = 1,
+ .active_low = 1,
},
{
- .out_type = IRQ_TYPE_EDGE_FALLING,
- .trigger = IOAPIC_EDGE,
- .polarity = 0,
+ .out_type = IRQ_TYPE_EDGE_RISING,
+ .is_level = 0,
+ .active_low = 0,
},
};

@@ -228,7 +228,7 @@ static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
return -EINVAL;

it = &of_ioapic_type[type_index];
- ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
+ ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->is_level, it->active_low);
tmp.devid = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
tmp.ioapic.pin = fwspec->param[0];

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 7a50f0b62a70..08651a4e6aa0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -7,6 +7,7 @@
#include <linux/cpu.h>
#include <linux/irq.h>

+#include <asm/irq_remapping.h>
#include <asm/hpet.h>
#include <asm/time.h>

@@ -50,7 +51,7 @@ unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
bool hpet_msi_disable;

-#ifdef CONFIG_PCI_MSI
+#ifdef CONFIG_GENERIC_MSI_IRQ
static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel);
static struct irq_domain *hpet_domain;
#endif
@@ -467,9 +468,8 @@ static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
/*
* HPET MSI Support
*/
-#ifdef CONFIG_PCI_MSI
-
-void hpet_msi_unmask(struct irq_data *data)
+#ifdef CONFIG_GENERIC_MSI_IRQ
+static void hpet_msi_unmask(struct irq_data *data)
{
struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
@@ -479,7 +479,7 @@ void hpet_msi_unmask(struct irq_data *data)
hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}

-void hpet_msi_mask(struct irq_data *data)
+static void hpet_msi_mask(struct irq_data *data)
{
struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
@@ -489,12 +489,122 @@ void hpet_msi_mask(struct irq_data *data)
hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}

-void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg)
+static void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg)
{
hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num));
hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4);
}

+static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
+}
+
+static struct irq_chip hpet_msi_controller __ro_after_init = {
+ .name = "HPET-MSI",
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_set_affinity = msi_domain_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_write_msi_msg = hpet_msi_write_msg,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int hpet_msi_init(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq,
+ irq_hw_number_t hwirq, msi_alloc_info_t *arg)
+{
+ irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
+ irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL,
+ handle_edge_irq, arg->data, "edge");
+
+ return 0;
+}
+
+static void hpet_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info, unsigned int virq)
+{
+ irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
+}
+
+static struct msi_domain_ops hpet_msi_domain_ops = {
+ .msi_init = hpet_msi_init,
+ .msi_free = hpet_msi_free,
+};
+
+static struct msi_domain_info hpet_msi_domain_info = {
+ .ops = &hpet_msi_domain_ops,
+ .chip = &hpet_msi_controller,
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS,
+};
+
+static struct irq_domain *hpet_create_irq_domain(int hpet_id)
+{
+ struct msi_domain_info *domain_info;
+ struct irq_domain *parent, *d;
+ struct fwnode_handle *fn;
+ struct irq_fwspec fwspec;
+
+ if (x86_vector_domain == NULL)
+ return NULL;
+
+ domain_info = kzalloc(sizeof(*domain_info), GFP_KERNEL);
+ if (!domain_info)
+ return NULL;
+
+ *domain_info = hpet_msi_domain_info;
+ domain_info->data = (void *)(long)hpet_id;
+
+ fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
+ hpet_id);
+ if (!fn) {
+ kfree(domain_info);
+ return NULL;
+ }
+
+ fwspec.fwnode = fn;
+ fwspec.param_count = 1;
+ fwspec.param[0] = hpet_id;
+
+ parent = irq_find_matching_fwspec(&fwspec, DOMAIN_BUS_ANY);
+ if (!parent) {
+ irq_domain_free_fwnode(fn);
+ kfree(domain_info);
+ return NULL;
+ }
+ if (parent != x86_vector_domain)
+ hpet_msi_controller.name = "IR-HPET-MSI";
+
+ d = msi_create_irq_domain(fn, domain_info, parent);
+ if (!d) {
+ irq_domain_free_fwnode(fn);
+ kfree(domain_info);
+ }
+ return d;
+}
+
+static inline int hpet_dev_id(struct irq_domain *domain)
+{
+ struct msi_domain_info *info = msi_get_domain_info(domain);
+
+ return (int)(long)info->data;
+}
+
+static int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
+ int dev_num)
+{
+ struct irq_alloc_info info;
+
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_HPET;
+ info.data = hc;
+ info.devid = hpet_dev_id(domain);
+ info.hwirq = dev_num;
+
+ return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
+}
+
static int hpet_clkevt_msi_resume(struct clock_event_device *evt)
{
struct hpet_channel *hc = clockevent_to_channel(evt);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7f57ede3cb8e..5e78e01ca3b4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -740,6 +740,11 @@ static void __init kvm_apic_init(void)
#endif
}

+static bool __init kvm_msi_ext_dest_id(void)
+{
+ return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
+}
+
static void __init kvm_init_platform(void)
{
kvmclock_init();
@@ -769,6 +774,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = {
.type = X86_HYPER_KVM,
.init.guest_late_init = kvm_guest_init,
.init.x2apic_available = kvm_para_available,
+ .init.msi_ext_dest_id = kvm_msi_ext_dest_id,
.init.init_platform = kvm_init_platform,
#if defined(CONFIG_AMD_MEM_ENCRYPT)
.runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index de776b2e6046..d133d6580f41 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -747,13 +747,14 @@ static void __init smp_quirk_init_udelay(void)
int
wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
{
+ u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
unsigned long send_status, accept_status = 0;
int maxlvt;

/* Target chip */
/* Boot on the stack */
/* Kick the second */
- apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
+ apic_icr_write(APIC_DM_NMI | dm, apicid);

pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -980,10 +981,7 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
if (!boot_error) {
enable_start_cpu0 = 1;
*cpu0_nmi_registered = 1;
- if (apic->dest_logical == APIC_DEST_LOGICAL)
- id = cpu0_logical_apicid;
- else
- id = apicid;
+ id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
}

diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a3038d8deb6a..8b395821cb8d 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -110,6 +110,7 @@ struct x86_init_ops x86_init __initdata = {
.init_platform = x86_init_noop,
.guest_late_init = x86_init_noop,
.x2apic_available = bool_x86_init_noop,
+ .msi_ext_dest_id = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop,
.init_after_bootmem = x86_init_noop,
},
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 4aa1c2e00e2a..8a4de3f12820 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -16,8 +16,6 @@

#include <trace/events/kvm.h>

-#include <asm/msidef.h>
-
#include "irq.h"

#include "ioapic.h"
@@ -104,22 +102,19 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
{
- trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
- (u64)e->msi.address_hi << 32 : 0),
- e->msi.data);
-
- irq->dest_id = (e->msi.address_lo &
- MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
- if (kvm->arch.x2apic_format)
- irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
- irq->vector = (e->msi.data &
- MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
- irq->dest_mode = kvm_lapic_irq_dest_mode(
- !!((1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo));
- irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
- irq->delivery_mode = e->msi.data & 0x700;
- irq->msi_redir_hint = ((e->msi.address_lo
- & MSI_ADDR_REDIRECTION_LOWPRI) > 0);
+ struct msi_msg msg = { .address_lo = e->msi.address_lo,
+ .address_hi = e->msi.address_hi,
+ .data = e->msi.data };
+
+ trace_kvm_msi_set_irq(msg.address_lo | (kvm->arch.x2apic_format ?
+ (u64)msg.address_hi << 32 : 0), msg.data);
+
+ irq->dest_id = x86_msi_msg_get_destid(&msg, kvm->arch.x2apic_format);
+ irq->vector = msg.arch_data.vector;
+ irq->dest_mode = kvm_lapic_irq_dest_mode(msg.arch_addr_lo.dest_mode_logical);
+ irq->trig_mode = msg.arch_data.is_level;
+ irq->delivery_mode = msg.arch_data.delivery_mode << 8;
+ irq->msi_redir_hint = msg.arch_addr_lo.redirect_hint;
irq->level = 1;
irq->shorthand = APIC_DEST_NOSHORT;
}
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 24ca4ee2802f..95e2e6bd8d8c 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,7 +215,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
static int intel_mid_pci_irq_enable(struct pci_dev *dev)
{
struct irq_alloc_info info;
- int polarity;
+ bool polarity_low;
int ret;
u8 gsi;

@@ -230,7 +230,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)

switch (intel_mid_identify_cpu()) {
case INTEL_MID_CPU_CHIP_TANGIER:
- polarity = IOAPIC_POL_HIGH;
+ polarity_low = false;

/* Special treatment for IRQ0 */
if (gsi == 0) {
@@ -252,11 +252,11 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
}
break;
default:
- polarity = IOAPIC_POL_LOW;
+ polarity_low = true;
break;
}

- ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity);
+ ioapic_set_alloc_attr(&info, dev_to_node(&dev->dev), 1, polarity_low);

/*
* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c552cd2d0632..3d41a09c2c14 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -152,7 +152,6 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi,

#if defined(CONFIG_PCI_MSI)
#include <linux/msi.h>
-#include <asm/msidef.h>

struct xen_pci_frontend_ops *xen_pci_frontend;
EXPORT_SYMBOL_GPL(xen_pci_frontend);
@@ -210,23 +209,20 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return ret;
}

-#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \
- MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
-
static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
struct msi_msg *msg)
{
- /* We set vector == 0 to tell the hypervisor we don't care about it,
- * but we want a pirq setup instead.
- * We use the dest_id field to pass the pirq that we want. */
- msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- MSI_ADDR_DEST_MODE_PHYSICAL |
- MSI_ADDR_REDIRECTION_CPU |
- MSI_ADDR_DEST_ID(pirq);
-
- msg->data = XEN_PIRQ_MSI_DATA;
+ /*
+ * We set vector == 0 to tell the hypervisor we don't care about
+ * it, but we want a pirq setup instead. We use the dest_id fields
+ * to pass the pirq that we want.
+ */
+ memset(msg, 0, sizeof(*msg));
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+ msg->arch_addr_hi.destid_8_31 = pirq >> 8;
+ msg->arch_addr_lo.destid_0_7 = pirq & 0xFF;
+ msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_EXTINT;
}

static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 18ca2261cc9a..1a536a187d74 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -35,8 +35,8 @@ static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info)
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
+ entry->delivery_mode = apic->delivery_mode;
+ entry->dest_mode = apic->dest_mode_logical;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index e82fd1910dae..0d46cc283cf5 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -148,15 +148,12 @@ static struct apic xen_pv_apic = {
.apic_id_valid = xen_id_always_valid,
.apic_id_registered = xen_id_always_registered,

- /* .irq_delivery_mode - used in native_compose_msi_msg only */
- /* .irq_dest_mode - used in native_compose_msi_msg only */
+ /* .delivery_mode and .dest_mode_logical not used by XENPV */

.disable_esr = 0,
- /* .dest_logical - default_send_IPI_ use it but we use our own. */
- .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */

+ .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */
.init_apic_ldr = xen_noop, /* setup_local_APIC calls it */
-
.ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */
.setup_apic_routing = NULL,
.cpu_present_to_apicid = xen_cpu_present_to_apicid,
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index f696ac7c5f89..ba74a722a400 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -893,7 +893,7 @@ struct amd_ir_data {
};

struct amd_irte_ops {
- void (*prepare)(void *, u32, u32, u8, u32, int);
+ void (*prepare)(void *, u32, bool, u8, u32, int);
void (*activate)(void *, u16, u16);
void (*deactivate)(void *, u16, u16);
void (*set_affinity)(void *, u16, u16, u8, u32);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 82e4af8f09bb..07d1f9913ce4 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -16,6 +16,7 @@
#include <linux/syscore_ops.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
+#include <linux/irq.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
#include <linux/kmemleak.h>
@@ -23,7 +24,6 @@
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/apic.h>
-#include <asm/msidef.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
@@ -1558,14 +1558,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
break;
}

- /*
- * Note: Since iommu_update_intcapxt() leverages
- * the IOMMU MMIO access to MSI capability block registers
- * for MSI address lo/hi/data, we need to check both
- * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support.
- */
- if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) &&
- (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT)))
+ if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
break;
default:
@@ -1602,9 +1595,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (ret)
return ret;

- ret = amd_iommu_create_irq_domain(iommu);
- if (ret)
- return ret;
+ if (amd_iommu_irq_remap) {
+ ret = amd_iommu_create_irq_domain(iommu);
+ if (ret)
+ return ret;
+ }

/*
* Make sure IOMMU is not considered to translate itself. The IVRS
@@ -1966,98 +1961,190 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
return 0;
}

-#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2)
-#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8)
-#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
-#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
+union intcapxt {
+ u64 capxt;
+ struct {
+ u64 reserved_0 : 2,
+ dest_mode_logical : 1,
+ reserved_1 : 5,
+ destid_0_23 : 24,
+ vector : 8,
+ reserved_2 : 16,
+ destid_24_31 : 8;
+ };
+} __attribute__ ((packed));

/*
- * Setup the IntCapXT registers with interrupt routing information
- * based on the PCI MSI capability block registers, accessed via
- * MMIO MSI address low/hi and MSI data registers.
+ * There isn't really any need to mask/unmask at the irqchip level because
+ * the 64-bit INTCAPXT registers can be updated atomically without tearing
+ * when the affinity is being updated.
*/
-static void iommu_update_intcapxt(struct amd_iommu *iommu)
+static void intcapxt_unmask_irq(struct irq_data *data)
{
- u64 val;
- u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
- u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
- u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
- bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
- u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+}
+
+static void intcapxt_mask_irq(struct irq_data *data)
+{
+}

- if (x2apic_enabled())
- dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+static struct irq_chip intcapxt_controller;

- val = XT_INT_VEC(data & 0xFF) |
- XT_INT_DEST_MODE(dm) |
- XT_INT_DEST_LO(dest) |
- XT_INT_DEST_HI(dest);
+static int intcapxt_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irqd, bool reserve)
+{
+ struct amd_iommu *iommu = irqd->chip_data;
+ struct irq_cfg *cfg = irqd_cfg(irqd);
+ union intcapxt xt;
+
+ xt.capxt = 0ULL;
+ xt.dest_mode_logical = apic->dest_mode_logical;
+ xt.vector = cfg->vector;
+ xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
+ xt.destid_24_31 = cfg->dest_apicid >> 24;

/**
* Current IOMMU implemtation uses the same IRQ for all
* 3 IOMMU interrupts.
*/
- writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
- writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
- writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ return 0;
}

-static void _irq_notifier_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
+static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
+ struct irq_data *irqd)
{
- struct amd_iommu *iommu;
+ intcapxt_mask_irq(irqd);
+}

- for_each_iommu(iommu) {
- if (iommu->dev->irq == notify->irq) {
- iommu_update_intcapxt(iommu);
- break;
- }
+
+static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ int i, ret;
+
+ if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
+ return -EINVAL;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
+
+ for (i = virq; i < virq + nr_irqs; i++) {
+ struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
+
+ irqd->chip = &intcapxt_controller;
+ irqd->chip_data = info->data;
+ __irq_set_handler(i, handle_edge_irq, 0, "edge");
}
+
+ return ret;
}

-static void _irq_notifier_release(struct kref *ref)
+static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
}

-static int iommu_init_intcapxt(struct amd_iommu *iommu)
+static int intcapxt_set_affinity(struct irq_data *irqd,
+ const struct cpumask *mask, bool force)
{
+ struct irq_data *parent = irqd->parent_data;
int ret;
- struct irq_affinity_notify *notify = &iommu->intcapxt_notify;

- /**
- * IntCapXT requires XTSup=1 and MsiCapMmioSup=1,
- * which can be inferred from amd_iommu_xt_mode.
- */
- if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
- return 0;
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;

- /**
- * Also, we need to setup notifier to update the IntCapXT registers
- * whenever the irq affinity is changed from user-space.
- */
- notify->irq = iommu->dev->irq;
- notify->notify = _irq_notifier_notify,
- notify->release = _irq_notifier_release,
- ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+ return intcapxt_irqdomain_activate(irqd->domain, irqd, false);
+}
+
+static struct irq_chip intcapxt_controller = {
+ .name = "IOMMU-MSI",
+ .irq_unmask = intcapxt_unmask_irq,
+ .irq_mask = intcapxt_mask_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_affinity = intcapxt_set_affinity,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+static const struct irq_domain_ops intcapxt_domain_ops = {
+ .alloc = intcapxt_irqdomain_alloc,
+ .free = intcapxt_irqdomain_free,
+ .activate = intcapxt_irqdomain_activate,
+ .deactivate = intcapxt_irqdomain_deactivate,
+};
+
+
+static struct irq_domain *iommu_irqdomain;
+
+static struct irq_domain *iommu_get_irqdomain(void)
+{
+ struct fwnode_handle *fn;
+
+ /* No need for locking here (yet) as the init is single-threaded */
+ if (iommu_irqdomain)
+ return iommu_irqdomain;
+
+ fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
+ if (!fn)
+ return NULL;
+
+ iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
+ fn, &intcapxt_domain_ops,
+ NULL);
+ if (!iommu_irqdomain)
+ irq_domain_free_fwnode(fn);
+
+ return iommu_irqdomain;
+}
+
+static int iommu_setup_intcapxt(struct amd_iommu *iommu)
+{
+ struct irq_domain *domain;
+ struct irq_alloc_info info;
+ int irq, ret;
+
+ domain = iommu_get_irqdomain();
+ if (!domain)
+ return -ENXIO;
+
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
+ info.data = iommu;
+
+ irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
+ if (irq < 0) {
+ irq_domain_remove(domain);
+ return irq;
+ }
+
+ ret = request_threaded_irq(irq, amd_iommu_int_handler,
+ amd_iommu_int_thread, 0, "AMD-Vi", iommu);
if (ret) {
- pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
- iommu->devid, iommu->dev->irq);
+ irq_domain_free_irqs(irq, 1);
+ irq_domain_remove(domain);
return ret;
}

- iommu_update_intcapxt(iommu);
iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
- return ret;
+ return 0;
}

-static int iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_irq(struct amd_iommu *iommu)
{
int ret;

if (iommu->int_enabled)
goto enable_faults;

- if (iommu->dev->msi_cap)
+ if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
+ ret = iommu_setup_intcapxt(iommu);
+ else if (iommu->dev->msi_cap)
ret = iommu_setup_msi(iommu);
else
ret = -ENODEV;
@@ -2066,10 +2153,6 @@ static int iommu_init_msi(struct amd_iommu *iommu)
return ret;

enable_faults:
- ret = iommu_init_intcapxt(iommu);
- if (ret)
- return ret;
-
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);

if (iommu->ppr_log != NULL)
@@ -2692,7 +2775,7 @@ static int amd_iommu_enable_interrupts(void)
int ret = 0;

for_each_iommu(iommu) {
- ret = iommu_init_msi(iommu);
+ ret = iommu_init_irq(iommu);
if (ret)
goto out;
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b9cf59443843..463d322a4f3b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -35,7 +35,6 @@
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/hw_irq.h>
-#include <asm/msidef.h>
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -3466,7 +3465,7 @@ static void free_irte(u16 devid, int index)
}

static void irte_prepare(void *entry,
- u32 delivery_mode, u32 dest_mode,
+ u32 delivery_mode, bool dest_mode,
u8 vector, u32 dest_apicid, int devid)
{
union irte *irte = (union irte *) entry;
@@ -3480,7 +3479,7 @@ static void irte_prepare(void *entry,
}

static void irte_ga_prepare(void *entry,
- u32 delivery_mode, u32 dest_mode,
+ u32 delivery_mode, bool dest_mode,
u8 vector, u32 dest_apicid, int devid)
{
struct irte_ga *irte = (struct irte_ga *) entry;
@@ -3602,10 +3601,8 @@ static int get_devid(struct irq_alloc_info *info)
{
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
return get_ioapic_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_HPET:
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
return get_hpet_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
@@ -3616,54 +3613,28 @@ static int get_devid(struct irq_alloc_info *info)
}
}

-static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info,
- int devid)
-{
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
- if (!iommu)
- return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
- return iommu->ir_domain;
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-}
-
-static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
-{
- int devid;
-
- if (!info)
- return NULL;
-
- devid = get_devid(info);
- if (devid < 0)
- return NULL;
- return get_irq_domain_for_devid(info, devid);
-}
-
struct irq_remap_ops amd_iommu_irq_ops = {
.prepare = amd_iommu_prepare,
.enable = amd_iommu_enable,
.disable = amd_iommu_disable,
.reenable = amd_iommu_reenable,
.enable_faulting = amd_iommu_enable_faulting,
- .get_irq_domain = get_irq_domain,
};

+static void fill_msi_msg(struct msi_msg *msg, u32 index)
+{
+ msg->data = index;
+ msg->address_lo = 0;
+ msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+}
+
static void irq_remapping_prepare_irte(struct amd_ir_data *data,
struct irq_cfg *irq_cfg,
struct irq_alloc_info *info,
int devid, int index, int sub_handle)
{
struct irq_2_irte *irte_info = &data->irq_2_irte;
- struct msi_msg *msg = &data->msi_entry;
- struct IO_APIC_route_entry *entry;
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];

if (!iommu)
@@ -3671,31 +3642,16 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data,

data->irq_2_irte.devid = devid;
data->irq_2_irte.index = index + sub_handle;
- iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode,
- apic->irq_dest_mode, irq_cfg->vector,
+ iommu->irte_ops->prepare(data->entry, apic->delivery_mode,
+ apic->dest_mode_logical, irq_cfg->vector,
irq_cfg->dest_apicid, devid);

switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
- /* Setup IOAPIC entry */
- entry = info->ioapic.entry;
- info->ioapic.entry = NULL;
- memset(entry, 0, sizeof(*entry));
- entry->vector = index;
- entry->mask = 0;
- entry->trigger = info->ioapic.trigger;
- entry->polarity = info->ioapic.polarity;
- /* Mask level triggered irqs. */
- if (info->ioapic.trigger)
- entry->mask = 1;
- break;
-
case X86_IRQ_ALLOC_TYPE_HPET:
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo = MSI_ADDR_BASE_LO;
- msg->data = irte_info->index;
+ fill_msi_msg(&data->msi_entry, irte_info->index);
break;

default:
@@ -3892,7 +3848,26 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
irte_info->index);
}

+static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ struct amd_iommu *iommu;
+ int devid = -1;
+
+ if (x86_fwspec_is_ioapic(fwspec))
+ devid = get_ioapic_devid(fwspec->param[0]);
+ else if (x86_fwspec_is_hpet(fwspec))
+ devid = get_hpet_devid(fwspec->param[0]);
+
+ if (devid < 0)
+ return 0;
+
+ iommu = amd_iommu_rlookup_table[devid];
+ return iommu && iommu->ir_domain == d;
+}
+
static const struct irq_domain_ops amd_ir_domain_ops = {
+ .select = irq_remapping_select,
.alloc = irq_remapping_alloc,
.free = irq_remapping_free,
.activate = irq_remapping_activate,
@@ -3943,8 +3918,8 @@ int amd_iommu_deactivate_guest_mode(void *data)
entry->hi.val = 0;

entry->lo.fields_remap.valid = valid;
- entry->lo.fields_remap.dm = apic->irq_dest_mode;
- entry->lo.fields_remap.int_type = apic->irq_delivery_mode;
+ entry->lo.fields_remap.dm = apic->dest_mode_logical;
+ entry->lo.fields_remap.int_type = apic->delivery_mode;
entry->hi.fields.vector = cfg->vector;
entry->lo.fields_remap.destination =
APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index e09e2d734c57..1d21a0b5f724 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -40,7 +40,6 @@ static int hyperv_ir_set_affinity(struct irq_data *data,
{
struct irq_data *parent = data->parent_data;
struct irq_cfg *cfg = irqd_cfg(data);
- struct IO_APIC_route_entry *entry;
int ret;

/* Return error If new irq affinity is out of ioapic_max_cpumask. */
@@ -51,9 +50,6 @@ static int hyperv_ir_set_affinity(struct irq_data *data,
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;

- entry = data->chip_data;
- entry->dest = cfg->dest_apicid;
- entry->vector = cfg->vector;
send_cleanup_vector(cfg);

return 0;
@@ -89,20 +85,6 @@ static int hyperv_irq_remapping_alloc(struct irq_domain *domain,

irq_data->chip = &hyperv_ir_chip;

- /*
- * If there is interrupt remapping function of IOMMU, setting irq
- * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't
- * support interrupt remapping function, setting irq affinity of IO-APIC
- * interrupts still needs to change IO-APIC registers. But ioapic_
- * configure_entry() will ignore value of cfg->vector and cfg->
- * dest_apicid when IO-APIC's parent irq domain is not the vector
- * domain.(See ioapic_configure_entry()) In order to setting vector
- * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved
- * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_
- * affinity() set vector and dest_apicid directly into IO-APIC entry.
- */
- irq_data->chip_data = info->ioapic.entry;
-
/*
* Hypver-V IO APIC irq affinity should be in the scope of
* ioapic_max_cpumask because no irq remapping support.
@@ -119,22 +101,18 @@ static void hyperv_irq_remapping_free(struct irq_domain *domain,
irq_domain_free_irqs_common(domain, virq, nr_irqs);
}

-static int hyperv_irq_remapping_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool reserve)
+static int hyperv_irq_remapping_select(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
{
- struct irq_cfg *cfg = irqd_cfg(irq_data);
- struct IO_APIC_route_entry *entry = irq_data->chip_data;
-
- entry->dest = cfg->dest_apicid;
- entry->vector = cfg->vector;
-
- return 0;
+ /* Claim the only I/O APIC emulated by Hyper-V */
+ return x86_fwspec_is_ioapic(fwspec);
}

static const struct irq_domain_ops hyperv_ir_domain_ops = {
+ .select = hyperv_irq_remapping_select,
.alloc = hyperv_irq_remapping_alloc,
.free = hyperv_irq_remapping_free,
- .activate = hyperv_irq_remapping_activate,
};

static int __init hyperv_prepare_irq_remapping(void)
@@ -143,6 +121,7 @@ static int __init hyperv_prepare_irq_remapping(void)
int i;

if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) ||
+ x86_init.hyper.msi_ext_dest_id() ||
!x2apic_supported())
return -ENODEV;

@@ -182,18 +161,9 @@ static int __init hyperv_enable_irq_remapping(void)
return IRQ_REMAP_X2APIC_MODE;
}

-static struct irq_domain *hyperv_get_irq_domain(struct irq_alloc_info *info)
-{
- if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT)
- return ioapic_ir_domain;
- else
- return NULL;
-}
-
struct irq_remap_ops hyperv_irq_remap_ops = {
.prepare = hyperv_prepare_irq_remapping,
.enable = hyperv_enable_irq_remapping,
- .get_irq_domain = hyperv_get_irq_domain,
};

#endif
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 0cfce1d3b7bb..aeffda92b10b 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -20,7 +20,6 @@
#include <asm/cpu.h>
#include <asm/irq_remapping.h>
#include <asm/pci-direct.h>
-#include <asm/msidef.h>

#include "../irq_remapping.h"

@@ -204,13 +203,13 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
return rc;
}

-static struct irq_domain *map_hpet_to_ir(u8 hpet_id)
+static struct intel_iommu *map_hpet_to_iommu(u8 hpet_id)
{
int i;

for (i = 0; i < MAX_HPET_TBS; i++) {
if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu)
- return ir_hpet[i].iommu->ir_domain;
+ return ir_hpet[i].iommu;
}
return NULL;
}
@@ -226,13 +225,6 @@ static struct intel_iommu *map_ioapic_to_iommu(int apic)
return NULL;
}

-static struct irq_domain *map_ioapic_to_ir(int apic)
-{
- struct intel_iommu *iommu = map_ioapic_to_iommu(apic);
-
- return iommu ? iommu->ir_domain : NULL;
-}
-
static struct irq_domain *map_dev_to_ir(struct pci_dev *dev)
{
struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev);
@@ -1113,7 +1105,7 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
memset(irte, 0, sizeof(*irte));

irte->present = 1;
- irte->dst_mode = apic->irq_dest_mode;
+ irte->dst_mode = apic->dest_mode_logical;
/*
* Trigger mode in the IRTE will always be edge, and for IO-APIC, the
* actual level or edge trigger will be setup in the IO-APIC
@@ -1122,35 +1114,18 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
* irq migration in the presence of interrupt-remapping.
*/
irte->trigger_mode = 0;
- irte->dlvry_mode = apic->irq_delivery_mode;
+ irte->dlvry_mode = apic->delivery_mode;
irte->vector = vector;
irte->dest_id = IRTE_DEST(dest);
irte->redir_hint = 1;
}

-static struct irq_domain *intel_get_irq_domain(struct irq_alloc_info *info)
-{
- if (!info)
- return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
- return map_ioapic_to_ir(info->devid);
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
- return map_hpet_to_ir(info->devid);
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-}
-
struct irq_remap_ops intel_irq_remap_ops = {
.prepare = intel_prepare_irq_remapping,
.enable = intel_enable_irq_remapping,
.disable = disable_irq_remapping,
.reenable = reenable_irq_remapping,
.enable_faulting = enable_drhd_fault_handling,
- .get_irq_domain = intel_get_irq_domain,
};

static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
@@ -1260,16 +1235,30 @@ static struct irq_chip intel_ir_chip = {
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
};

+static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle)
+{
+ memset(msg, 0, sizeof(*msg));
+
+ msg->arch_addr_lo.dmar_base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->arch_addr_lo.dmar_subhandle_valid = true;
+ msg->arch_addr_lo.dmar_format = true;
+ msg->arch_addr_lo.dmar_index_0_14 = index & 0x7FFF;
+ msg->arch_addr_lo.dmar_index_15 = !!(index & 0x8000);
+
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+
+ msg->arch_data.dmar_subhandle = subhandle;
+}
+
static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
struct irq_cfg *irq_cfg,
struct irq_alloc_info *info,
int index, int sub_handle)
{
- struct IR_IO_APIC_route_entry *entry;
struct irte *irte = &data->irte_entry;
- struct msi_msg *msg = &data->msi_entry;

prepare_irte(irte, irq_cfg->vector, irq_cfg->dest_apicid);
+
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
/* Set source-id of interrupt request */
@@ -1280,46 +1269,20 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
irte->trigger_mode, irte->dlvry_mode,
irte->avail, irte->vector, irte->dest_id,
irte->sid, irte->sq, irte->svt);
-
- entry = (struct IR_IO_APIC_route_entry *)info->ioapic.entry;
- info->ioapic.entry = NULL;
- memset(entry, 0, sizeof(*entry));
- entry->index2 = (index >> 15) & 0x1;
- entry->zero = 0;
- entry->format = 1;
- entry->index = (index & 0x7fff);
- /*
- * IO-APIC RTE will be configured with virtual vector.
- * irq handler will do the explicit EOI to the io-apic.
- */
- entry->vector = info->ioapic.pin;
- entry->mask = 0; /* enable IRQ */
- entry->trigger = info->ioapic.trigger;
- entry->polarity = info->ioapic.polarity;
- if (info->ioapic.trigger)
- entry->mask = 1; /* Mask level triggered irqs. */
+ sub_handle = info->ioapic.pin;
break;
-
case X86_IRQ_ALLOC_TYPE_HPET:
+ set_hpet_sid(irte, info->devid);
+ break;
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
- if (info->type == X86_IRQ_ALLOC_TYPE_HPET)
- set_hpet_sid(irte, info->devid);
- else
- set_msi_sid(irte, msi_desc_to_pci_dev(info->desc));
-
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->data = sub_handle;
- msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
- MSI_ADDR_IR_SHV |
- MSI_ADDR_IR_INDEX1(index) |
- MSI_ADDR_IR_INDEX2(index);
+ set_msi_sid(irte, msi_desc_to_pci_dev(info->desc));
break;
-
default:
BUG_ON(1);
break;
}
+ fill_msi_msg(&data->msi_entry, index, sub_handle);
}

static void intel_free_irq_resources(struct irq_domain *domain,
@@ -1444,7 +1407,22 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
modify_irte(&data->irq_2_iommu, &entry);
}

+static int intel_irq_remapping_select(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ struct intel_iommu *iommu = NULL;
+
+ if (x86_fwspec_is_ioapic(fwspec))
+ iommu = map_ioapic_to_iommu(fwspec->param[0]);
+ else if (x86_fwspec_is_hpet(fwspec))
+ iommu = map_hpet_to_iommu(fwspec->param[0]);
+
+ return iommu && d == iommu->ir_domain;
+}
+
static const struct irq_domain_ops intel_ir_domain_ops = {
+ .select = intel_irq_remapping_select,
.alloc = intel_irq_remapping_alloc,
.free = intel_irq_remapping_free,
.activate = intel_irq_remapping_activate,
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2d84b1ed205e..83314b9d8f38 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -158,17 +158,3 @@ void panic_if_irq_remap(const char *msg)
if (irq_remapping_enabled)
panic(msg);
}
-
-/**
- * irq_remapping_get_irq_domain - Get the irqdomain serving the request @info
- * @info: interrupt allocation information, used to identify the IOMMU device
- *
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *irq_remapping_get_irq_domain(struct irq_alloc_info *info)
-{
- if (!remap_ops || !remap_ops->get_irq_domain)
- return NULL;
-
- return remap_ops->get_irq_domain(info);
-}
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 1661b3d75920..8c89cb947cdb 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -42,9 +42,6 @@ struct irq_remap_ops {

/* Enable fault handling */
int (*enable_faulting)(void);
-
- /* Get the irqdomain associated to IOMMU device */
- struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
};

extern struct irq_remap_ops intel_irq_remap_ops;
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 03ed5cb1c4b2..6db8d96a78eb 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1226,7 +1226,7 @@ static void hv_irq_unmask(struct irq_data *data)
params->int_target.vector = cfg->vector;

/*
- * Honoring apic->irq_delivery_mode set to dest_Fixed by
+ * Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED by
* setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
* spurious interrupt storm. Not doing so does not seem to have a
* negative effect (yet?).
@@ -1324,7 +1324,7 @@ static u32 hv_compose_msi_req_v1(
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
- int_pkt->int_desc.delivery_mode = dest_Fixed;
+ int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;

/*
* Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
@@ -1345,7 +1345,7 @@ static u32 hv_compose_msi_req_v2(
int_pkt->wslot.slot = slot;
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
- int_pkt->int_desc.delivery_mode = dest_Fixed;
+ int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;

/*
* Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index f375c21ceeb1..6f8795454e5a 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -18,7 +18,6 @@
#include <asm/irqdomain.h>
#include <asm/device.h>
#include <asm/msi.h>
-#include <asm/msidef.h>

#define VMD_CFGBAR 0
#define VMD_MEMBAR1 2
@@ -131,10 +130,10 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct vmd_irq_list *irq = vmdirq->irq;
struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);

- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo = MSI_ADDR_BASE_LO |
- MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
- msg->data = 0;
+ memset(msg, 0, sizeof(*msg));
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+ msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->arch_addr_lo.destid_0_7 = index_from_irqs(vmd, irq);
}

/*
diff --git a/include/asm-generic/msi.h b/include/asm-generic/msi.h
index e6795f088bdd..25344de0e8f9 100644
--- a/include/asm-generic/msi.h
+++ b/include/asm-generic/msi.h
@@ -4,6 +4,8 @@

#include <linux/types.h>

+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+
#ifndef NUM_MSI_ALLOC_SCRATCHPAD_REGS
# define NUM_MSI_ALLOC_SCRATCHPAD_REGS 2
#endif
@@ -30,4 +32,6 @@ typedef struct msi_alloc_info {

#define GENERIC_MSI_DOMAIN_OPS 1

+#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
+
#endif
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6b584cc4757c..360a0a7e7341 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -4,11 +4,50 @@

#include <linux/kobject.h>
#include <linux/list.h>
+#include <asm/msi.h>
+
+/* Dummy shadow structures if an architecture does not define them */
+#ifndef arch_msi_msg_addr_lo
+typedef struct arch_msi_msg_addr_lo {
+ u32 address_lo;
+} __attribute__ ((packed)) arch_msi_msg_addr_lo_t;
+#endif
+
+#ifndef arch_msi_msg_addr_hi
+typedef struct arch_msi_msg_addr_hi {
+ u32 address_hi;
+} __attribute__ ((packed)) arch_msi_msg_addr_hi_t;
+#endif
+
+#ifndef arch_msi_msg_data
+typedef struct arch_msi_msg_data {
+ u32 data;
+} __attribute__ ((packed)) arch_msi_msg_data_t;
+#endif

+/**
+ * msi_msg - Representation of a MSI message
+ * @address_lo: Low 32 bits of msi message address
+ * @arch_addrlo: Architecture specific shadow of @address_lo
+ * @address_hi: High 32 bits of msi message address
+ * (only used when device supports it)
+ * @arch_addrhi: Architecture specific shadow of @address_hi
+ * @data: MSI message data (usually 16 bits)
+ * @arch_data: Architecture specific shadow of @data
+ */
struct msi_msg {
- u32 address_lo; /* low 32 bits of msi message address */
- u32 address_hi; /* high 32 bits of msi message address */
- u32 data; /* 16 bits of msi message data */
+ union {
+ u32 address_lo;
+ arch_msi_msg_addr_lo_t arch_addr_lo;
+ };
+ union {
+ u32 address_hi;
+ arch_msi_msg_addr_hi_t arch_addr_hi;
+ };
+ union {
+ u32 data;
+ arch_msi_msg_data_t arch_data;
+ };
};

extern int pci_msi_ignore_mask;
@@ -243,7 +282,6 @@ struct msi_controller {
#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN

#include <linux/irqhandler.h>
-#include <asm/msi.h>

struct irq_domain;
struct irq_domain_ops;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cf8b374b892d..673fa64c1c44 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -42,7 +42,16 @@ static inline void debugfs_add_domain_dir(struct irq_domain *d) { }
static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
#endif

-const struct fwnode_operations irqchip_fwnode_ops;
+static const char *irqchip_fwnode_get_name(const struct fwnode_handle *fwnode)
+{
+ struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+
+ return fwid->name;
+}
+
+const struct fwnode_operations irqchip_fwnode_ops = {
+ .get_name = irqchip_fwnode_get_name,
+};
EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);

/**