Re: commit 7ffb791423c7 breaks steam game

From: Bert Karwatzki
Date: Wed Mar 26 2025 - 20:58:35 EST


Am Mittwoch, dem 26.03.2025 um 15:58 -0700 schrieb Linus Torvalds:
> On Wed, 26 Mar 2025 at 15:00, Bert Karwatzki <spasswolf@xxxxxx> wrote:
> >
> > As Balbir Singh found out this memory comes from amdkfd
> > (kgd2kfd_init_zone_device()) with CONFIG_HSA_AMD_SVM=y. The memory gets placed
> > by devm_request_free_mem_region() which places the memory at the end of the
> > physical address space (DIRECT_MAP_PHYSMEM_END). DIRECT_MAP_PHYSMEM_END changes
> > when using nokaslr and so the memory shifts.
>
> So I just want to say that having followed the thread as a spectator,
> big kudos to everybody involved in this thing. Particularly to you,
> Bart, for all your debugging and testing, and to Balbir for following
> up and figuring it out.
>
> Because this was a strange one.
>
> > One can work around this by removing the GFR_DESCENDING flag from
> > devm_request_free_mem_region() so the memory gets placed right after the other
> > resources:
>
> I worry that there might be other machines where that completely breaks things.
>
> There are various historical reasons why we look for addresses in high
> regions, ie on machines where there are various hidden IO regions that
> aren't enumerated by e280 and aren't found by our usual PCI BAR
> discovery because they are special hidden ones.
>
> So then users of [devm_]request_free_mem_region() might end up getting
> allocated a region that has some magic system resource in it.
>
> And no, this shouldn't happen on any normal machine, but it has
> definitely been a thing in the past.
>
> So I'm very happy that you guys figured out what ended up happening,
> but I'm not convinced that the devm_request_free_mem_region()
> workaround is tenable.
>
> So I think it needs to be more targeted to the HSA_AMD_SVM case than
> touch the devm_request_free_mem_region() logic for everybody.
>
> Linus

This patch adds another function devm_request_free_mem_region_from_end()
with an additional argument which allows to choose the end address from
which to place the resource.
The problem here is this uses dma_get_mask(adev->dev) as end address which
uses the dma mask for the discrete GPU while it should use the dma mask for
the built-in GPU (In my case both are equal (44bits), but I'm not sure if
this is always the case)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d05d199b5e44..e1942fef3637 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1042,7 +1042,8 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size -
1;
pgmap->type = MEMORY_DEVICE_COHERENT;
} else {
- res = devm_request_free_mem_region(adev->dev, &iomem_resource,
size);
+ res = devm_request_free_mem_region_from_end(adev->dev,
+ &iomem_resource, size, dma_get_mask(adev-
>dev));
if (IS_ERR(res))
return PTR_ERR(res);
pgmap->range.start = res->start;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5385349f0b8a..a9a765721ab4 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -407,6 +407,9 @@ walk_iomem_res_desc(unsigned long desc, unsigned long flags,
u64 start, u64 end,

struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size);
+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+ struct resource *base, unsigned long size,
+ resource_size_t seek_end);
struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name);
struct resource *alloc_free_mem_region(struct resource *base,
diff --git a/kernel/resource.c b/kernel/resource.c
index 12004452d999..82f40407c02d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1875,12 +1875,14 @@ EXPORT_SYMBOL(resource_list_free);
#endif

static resource_size_t gfr_start(struct resource *base, resource_size_t size,
- resource_size_t align, unsigned long flags)
+ resource_size_t align, resource_size_t
seek_end,
+ unsigned long flags)
{
if (flags & GFR_DESCENDING) {
resource_size_t end;

end = min_t(resource_size_t, base->end,
DIRECT_MAP_PHYSMEM_END);
+ end = min_t(resource_size_t, end, seek_end);
return end - size + 1;
}

@@ -1920,8 +1922,8 @@ static void remove_free_mem_region(void *_res)
static struct resource *
get_free_mem_region(struct device *dev, struct resource *base,
resource_size_t size, const unsigned long align,
- const char *name, const unsigned long desc,
- const unsigned long flags)
+ resource_size_t seek_end, const char *name,
+ const unsigned long desc, const unsigned long flags)
{
resource_size_t addr;
struct resource *res;
@@ -1946,7 +1948,7 @@ get_free_mem_region(struct device *dev, struct resource
*base,
}

write_lock(&resource_lock);
- for (addr = gfr_start(base, size, align, flags);
+ for (addr = gfr_start(base, size, align, seek_end, flags);
gfr_continue(base, addr, align, flags);
addr = gfr_next(addr, align, flags)) {
if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE)
!=
@@ -2021,17 +2023,30 @@ struct resource *devm_request_free_mem_region(struct
device *dev,
unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
- dev_name(dev),
+ DIRECT_MAP_PHYSMEM_END, dev_name(dev),
IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
}
EXPORT_SYMBOL_GPL(devm_request_free_mem_region);

+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+ struct resource *base, unsigned long size,
+ resource_size_t seek_end)
+{
+ unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
+
+ return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
+ seek_end, dev_name(dev),
+ IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
+}
+EXPORT_SYMBOL_GPL(devm_request_free_mem_region_from_end);
+
struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name)
{
unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

- return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN, name,
+ return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN,
+ DIRECT_MAP_PHYSMEM_END, name,
IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
}
EXPORT_SYMBOL_GPL(request_free_mem_region);
@@ -2055,8 +2070,8 @@ struct resource *alloc_free_mem_region(struct resource
*base,
/* Default of ascending direction and insert resource */
unsigned long flags = 0;

- return get_free_mem_region(NULL, base, size, align, name,
- IORES_DESC_NONE, flags);
+ return get_free_mem_region(NULL, base, size, align,
DIRECT_MAP_PHYSMEM_END,
+ name, IORES_DESC_NONE, flags);
}
EXPORT_SYMBOL_GPL(alloc_free_mem_region);
#endif /* CONFIG_GET_FREE_REGION */



Bert Karwatzki