Re: [PATCH -tip] x86: fix iommu=soft boot option

From: Yinghai Lu
Date: Wed Nov 25 2009 - 17:35:18 EST


FUJITA Tomonori wrote:
> On Wed, 25 Nov 2009 01:45:10 -0800
> Yinghai Lu <yinghai@xxxxxxxxxx> wrote:
>
>> Ingo Molnar wrote:
>>> * FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx> wrote:
>>>
>>>> On Wed, 25 Nov 2009 00:54:34 -0800
>>>> Yinghai Lu <yinghai@xxxxxxxxxx> wrote:
>>>>
>>>>> only remember that SLES 9 SP3 (?) at some point has problem with AMD
>>>>> 10h ( quad core version) when memory > 4 g (with USB controller ?)
>>>>> because the gart code only check K8 device id, and didn't check 10h
>>>>> device id. so gart iommu is not used. and happenly swiotlb code has
>>>>> problem with that kernel version.
>>>>>
>>>>> thinking we should keep old behavior, until some day we can remove
>>>>> them all.
>>>> Why? We are talking about changing 2.6.33 behavior. swiotlb in 2.6.33
>>>> should be the safe option.
>>> If that behavior was relied on for suspected (or real) bugs in the
>>> swiotlb code then i agree that we should do this change. (and fix any
>>> bugs if they still occur.)
>> after look at gart_iommu_hole_init() closely,
>>
>> should be
>>
>> for AMD 64bit, MEM > 4g, no AGP, iommu=soft
>> 1. if BIOS have correct gart setting, Kernel will use swiotlb
>> 2. if BIOS does not have correct gart setting, Kernel will use swiotlb
>>
>> and for the all the cases, the codes after that
>> /* Fix up the north bridges */
>> ...
>>
>> will disable the translation...
>
> What code are you talking about? GART translation is disabled at boot
> time (if not, we are dead because some GART hardware are broken so we
> need to fix things before enabling them).
>
>
>> so even swiotlb=soft is used, gart_iommu_hole_init() still need to
>> be called. to make sure aperture is disabled somehow.
>
> I don't think so.

in aperture_64.c::gart_iommu_hole_init()
printk(KERN_INFO "Checking aperture...\n");

if (!fallback_aper_force)
agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);

fix = 0;
node = 0;
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
int dev_base, dev_limit;

bus = bus_dev_ranges[i].bus;
dev_base = bus_dev_ranges[i].dev_base;
dev_limit = bus_dev_ranges[i].dev_limit;

for (slot = dev_base; slot < dev_limit; slot++) {

if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;

iommu_detected = 1;
gart_iommu_aperture = 1;

aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
aper_base <<= 25;

printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
node, aper_base, aper_size >> 20);
node++;

if (!aperture_valid(aper_base, aper_size, 64<<20)) {
if (valid_agp && agp_aper_base &&
agp_aper_base == aper_base &&
agp_aper_order == aper_order) {
/* the same between two setting from NB and agp */
if (!no_iommu &&
max_pfn > MAX_DMA32_PFN &&
!printed_gart_size_msg) {
printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
printk(KERN_ERR "please increase GART size in your BIOS setup\n");
printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
printed_gart_size_msg = 1;
}
} else {
POINT A:
fix = 1;
goto out;
}
}

if ((last_aper_order && aper_order != last_aper_order) ||
(last_aper_base && aper_base != last_aper_base)) {
fix = 1;
goto out;
}
last_aper_order = aper_order;
last_aper_base = aper_base;
}
}

out:
if (!fix && !fallback_aper_force) {
if (last_aper_base) {
unsigned long n = (32 * 1024 * 1024) << last_aper_order;

insert_aperture_resource((u32)last_aper_base, n);
}
return;
}

if (!fallback_aper_force) {
aper_alloc = agp_aper_base;
aper_order = agp_aper_order;
}

if (aper_alloc) {
/* Got the aperture from the AGP bridge */
} else if (swiotlb && !valid_agp) {
POINT: B
/* Do nothing */
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||
valid_agp ||
fallback_aper_force) {
POINT: C
printk(KERN_INFO
"Your BIOS doesn't leave a aperture memory hole\n");
printk(KERN_INFO
"Please enable the IOMMU option in the BIOS setup\n");
printk(KERN_INFO
"This costs you %d MB of RAM\n",
32 << fallback_aper_order);

aper_order = fallback_aper_order;
aper_alloc = allocate_aperture();
if (!aper_alloc) {
/*
* Could disable AGP and IOMMU here, but it's
* probably not worth it. But the later users
* cannot deal with bad apertures and turning
* on the aperture over memory causes very
* strange problems, so it's better to panic
* early.
*/
panic("Not enough memory for aperture");
}
} else {
return;
}

POINT X:

/* Fix up the north bridges */
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
int dev_base, dev_limit;

bus = bus_dev_ranges[i].bus;
dev_base = bus_dev_ranges[i].dev_base;
dev_limit = bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;

/* Don't enable translation yet. That is done later.
Assume this BIOS didn't initialise the GART so
just overwrite all previous bits */
write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
}
}

when AMD64 mem > 4g, no AGP, BIOS set all gart iommu on all nodes size to 32M, and enable bit are set.
1. iommu=soft, will go through POINT A and POINT B
2. no "iommu=soft", will go through POINT A and POINT C
and all will reach POINT X to make sure ENABLE bit is not set.

please check

[PATCH] x86: fix gart iommu using for amd 64 bit system -v3

after
|commit 75f1cdf1dda92cae037ec848ae63690d91913eac
|Author: FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx>
|Date: Tue Nov 10 19:46:20 2009 +0900
|
| x86: Handle HW IOMMU initialization failure gracefully
|
| If HW IOMMU initialization fails (Intel VT-d often does this,
| typically due to BIOS bugs), we fall back to nommu. It doesn't
| work for the majority since nowadays we have more than 4GB
| memory so we must use swiotlb instead of nommu.
...

amd 64 systems that
1. do not have AGP
2. do not have IOMMU
3. mem > 4g
4. BIOS do not allocate correct gart in NB.
will leave them to use SWIOTLB forcely.

also in pci_iommu_alloc()
pci_swiotlb_init is stealing the preallocate range that is for
gart_iommu_hole workaround.

so restore the sequence...

will get back
[ 0.000000] Your BIOS doesn't leave a aperture memory hole
[ 0.000000] Please enable the IOMMU option in the BIOS setup
[ 0.000000] This costs you 64 MB of RAM
[ 0.000000] Mapping aperture over 65536 KB of RAM @ 20000000
...
[ 12.702822] calling pci_iommu_init+0x0/0x31 @ 1
[ 12.708728] PCI-DMA: Disabling AGP.
[ 12.712812] PCI-DMA: aperture base @ 20000000 size 65536 KB
[ 12.718384] PCI-DMA: using GART IOMMU.
[ 12.722139] PCI-DMA: Reserving 64MB of IOMMU area in the AGP aperture
[ 12.736944] initcall pci_iommu_init+0x0/0x31 returned 0 after 28802 usecs

-v2: call shutdown when no agp is there
-v3: add check_store to restore state for swiotlb
separate pci_swiotlb_detect and pci_swiotlb_init from FUJITA

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
arch/x86/include/asm/swiotlb.h | 8 ++++++--
arch/x86/kernel/aperture_64.c | 17 ++++++++++++++---
arch/x86/kernel/pci-dma.c | 11 +++++++++--
arch/x86/kernel/pci-gart_64.c | 3 ++-
arch/x86/kernel/pci-swiotlb.c | 11 +++++++----
5 files changed, 38 insertions(+), 12 deletions(-)

Index: linux-2.6/arch/x86/kernel/aperture_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/aperture_64.c
+++ linux-2.6/arch/x86/kernel/aperture_64.c
@@ -375,6 +375,9 @@ void __init gart_iommu_hole_init(void)
u64 aper_base, last_aper_base = 0;
int fix, slot, valid_agp = 0;
int i, node;
+ int iommu_detected_old = iommu_detected;
+ int gart_iommu_aperture_old = gart_iommu_aperture;
+ int (*iommu_init_old)(void) = x86_init.iommu.iommu_init;

if (gart_iommu_aperture_disabled || !fix_aperture ||
!early_pci_allowed())
@@ -448,7 +451,7 @@ out:

insert_aperture_resource((u32)last_aper_base, n);
}
- return;
+ goto check_restore;
}

if (!fallback_aper_force) {
@@ -458,7 +461,7 @@ out:

if (aper_alloc) {
/* Got the aperture from the AGP bridge */
- } else if (!valid_agp) {
+ } else if (swiotlb && !valid_agp) {
/* Do nothing */
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||
@@ -486,7 +489,7 @@ out:
panic("Not enough memory for aperture");
}
} else {
- return;
+ goto check_restore;
}

/* Fix up the north bridges */
@@ -510,4 +513,12 @@ out:
}

set_up_gart_resume(aper_order, aper_alloc);
+
+check_restore:
+ if (swiotlb) {
+ /* restore the setting */
+ iommu_detected = iommu_detected_old;
+ gart_iommu_aperture = gart_iommu_aperture_old;
+ x86_init.iommu.iommu_init = iommu_init_old;
+ }
}
Index: linux-2.6/arch/x86/kernel/pci-dma.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/pci-dma.c
+++ linux-2.6/arch/x86/kernel/pci-dma.c
@@ -120,21 +120,28 @@ static void __init dma32_free_bootmem(vo

void __init pci_iommu_alloc(void)
{
+ int ret;
+
#ifdef CONFIG_X86_64
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
#endif
- if (pci_swiotlb_init())
- return;
+ ret = pci_swiotlb_detect();

gart_iommu_hole_init();

+ if (ret)
+ goto out;
+
detect_calgary();

detect_intel_iommu();

/* needs to be called after gart_iommu_hole_init */
amd_iommu_detect();
+
+out:
+ pci_swiotlb_init();
}

void *dma_generic_alloc_coherent(struct device *dev, size_t size,
Index: linux-2.6/arch/x86/kernel/pci-gart_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/pci-gart_64.c
+++ linux-2.6/arch/x86/kernel/pci-gart_64.c
@@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void)
struct pci_dev *dev;
int i;

- if (no_agp)
+ /* don't shutdown it if there is AGP installed */
+ if (!no_agp)
return;

for (i = 0; i < num_k8_northbridges; i++) {
Index: linux-2.6/arch/x86/include/asm/swiotlb.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/swiotlb.h
+++ linux-2.6/arch/x86/include/asm/swiotlb.h
@@ -5,13 +5,17 @@

#ifdef CONFIG_SWIOTLB
extern int swiotlb;
-extern int pci_swiotlb_init(void);
+int pci_swiotlb_detect(void);
+void pci_swiotlb_init(void);
#else
#define swiotlb 0
-static inline int pci_swiotlb_init(void)
+static inline int pci_swiotlb_detect(void)
{
return 0;
}
+static inline void pci_swiotlb_init(void)
+{
+}
#endif

static inline void dma_mark_clean(void *addr, size_t size) {}
Index: linux-2.6/arch/x86/kernel/pci-swiotlb.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/pci-swiotlb.c
+++ linux-2.6/arch/x86/kernel/pci-swiotlb.c
@@ -43,12 +43,12 @@ static struct dma_map_ops swiotlb_dma_op
};

/*
- * pci_swiotlb_init - initialize swiotlb if necessary
+ * pci_swiotlb_detect - initialize swiotlb if necessary
*
* This returns non-zero if we are forced to use swiotlb (by the boot
* option).
*/
-int __init pci_swiotlb_init(void)
+int __init pci_swiotlb_detect(void)
{
int use_swiotlb = swiotlb | swiotlb_force;

@@ -60,10 +60,13 @@ int __init pci_swiotlb_init(void)
if (swiotlb_force)
swiotlb = 1;

+ return use_swiotlb;
+}
+
+void __init pci_swiotlb_init(void)
+{
if (swiotlb) {
swiotlb_init(0);
dma_ops = &swiotlb_dma_ops;
}
-
- return use_swiotlb;
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/