Re: [PATCH v13 01/16] PCI: Let pci_mmap_page_range() take resource address

From: Yinghai Lu
Date: Wed Jun 22 2016 - 15:25:04 EST


On Wed, Jun 22, 2016 at 8:22 AM, Bjorn Helgaas <helgaas@xxxxxxxxxx> wrote:
> On Tue, Jun 21, 2016 at 09:32:49PM -0700, Yinghai Lu wrote:
>
> If sparc is broken, let's make this a tiny sparc-only patch that fixes
> only the breakage -- no cleanup or restructuring. Then we can do the
> more extensive work in a separate patch.

ok. please check attached two patches that take position for
[PATCH v13 01/16]

>
> The example mmap() I keep asking for would be very helpful to me in
> understanding the problem. It would probably also help folks who
> maintain user programs that use mmap. They need to figure out whether
> they have code that worked most places but has always been broken on
> sparc, or code that depended on the previous sparc behavior and will
> be broken by this change, or what.

ok.

calling : ./test_mmap_proc /proc/bus/pci/0000:00/04.0 0x2000000

code : test_mmap_proc.c

#include <stdio.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#define PCIIOC_BASE ('P' << 24 | 'C' << 16 | 'I' << 8)
#define PCIIOC_CONTROLLER (PCIIOC_BASE | 0x00) /* Get
controller for PCI device. */
#define PCIIOC_MMAP_IS_IO (PCIIOC_BASE | 0x01) /* Set mmap
state to I/O space. */
#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap
state to MEM space. */
#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /*
Enable/disable write-combining. */

/* sparc64 */
#define PAGE_SHIFT 13
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))

int main(int argc, char *argv[])
{
int i;
int fd;
char *addr;
unsigned long offset = 0;
unsigned long left = 0;

fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("open");
return 1;
}

if (argc > 2)
sscanf(argv[2], "0x%lx", &offset);

left = offset & (PAGE_SIZE - 1);
offset &= PAGE_MASK;

ioctl(fd, PCIIOC_MMAP_IS_MEM);
addr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, offset);
if (addr == MAP_FAILED) {
perror("mmap");
return 1;
}

printf("map finished\n");


for (i = 0; i < 8; i++)
printf("%x ", addr[i + left]);
printf("\n");

munmap(addr, PAGE_SIZE);

close(fd);

return 0;
}
From: Yinghai Lu <yinghai@xxxxxxxxxx>
Subject: [PATCH v13.update.part1 01/16] PCI: Fix proc mmap on sparc

In 8c05cd08a7 ("PCI: fix offset check for sysfs mmapped files"), try
to check exposed value with resource start/end in proc mmap path.

| start = vma->vm_pgoff;
| size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
| pci_start = (mmap_api == PCI_MMAP_PROCFS) ?
| pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0;
| if (start >= pci_start && start < pci_start + size &&
| start + nr <= pci_start + size)

vma->vm_pgoff is above code segment is user/BAR value >> PAGE_SHIFT.
pci_start is resource->start >> PAGE_SHIFT.

For sparc, resource start is different from BAR start aka pci bus address.
pci bus address need to add offset to be the resource start.

So that commit breaks all arch that exposed value is BAR/user value,
and need to be offseted to resource address.

test code using: ./test_mmap_proc /proc/bus/pci/0000:00/04.0 0x2000000
test code segment:
fd = open(argv[1], O_RDONLY);
...
sscanf(argv[2], "0x%lx", &offset);
left = offset & (PAGE_SIZE - 1);
offset &= PAGE_MASK;
ioctl(fd, PCIIOC_MMAP_IS_MEM);
addr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, offset);
for (i = 0; i < 8; i++)
printf("%x ", addr[i + left]);
munmap(addr, PAGE_SIZE);
close(fd);

Fixes: 8c05cd08a7 ("PCI: fix offset check for sysfs mmapped files")
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index bcd10c7..e907154 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -974,15 +974,20 @@ void pci_remove_legacy_files(struct pci_bus *b)
int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma,
enum pci_mmap_api mmap_api)
{
- unsigned long nr, start, size, pci_start;
+ unsigned long nr, start, size, pci_start = 0;

if (pci_resource_len(pdev, resno) == 0)
return 0;
nr = vma_pages(vma);
start = vma->vm_pgoff;
size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
- pci_start = (mmap_api == PCI_MMAP_PROCFS) ?
- pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0;
+ if (mmap_api == PCI_MMAP_PROCFS) {
+ resource_size_t user_start, user_end;
+
+ pci_resource_to_user(pdev, resno, &pdev->resource[resno],
+ &user_start, &user_end);
+ pci_start = user_start >> PAGE_SHIFT;
+ }
if (start >= pci_start && start < pci_start + size &&
start + nr <= pci_start + size)
return 1;
From: Yinghai Lu <yinghai@xxxxxxxxxx>
Subject: [PATCH v13.update.part2 01/16] PCI: Let pci_mmap_page_range() take resource address

Original pci_mmap_page_range() is taking PCI BAR value aka usr_address.

Bjorn found out that it would be much simple to pass resource address
directly and avoid extra those __pci_mmap_make_offset.

In this patch:
1. in proc path: proc_bus_pci_mmap, try convert back to resource
before calling pci_mmap_page_range
2. in sysfs path: pci_mmap_resource will just offset with resource start.
3. all pci_mmap_page_range will have vma->vm_pgoff with in resource
range instead of BAR value.
4. skip calling __pci_mmap_make_offset, as the checking is done
in pci_mmap_fits().

-v2: add pci_user_to_resource and remove __pci_mmap_make_offset
-v3: pass resource pointer with pci_mmap_page_range()
-v4: put __pci_mmap_make_offset() removing to following patch
seperate /sys io access alignment checking to another patch
updated after Bjorn's pci_resource_to_user() changes.
-v5: update after fix for pci_mmap with proc path accoring to
Bjorn.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Cc: sparclinux@xxxxxxxxxxxxxxx
Cc: linux-xtensa@xxxxxxxxxxxxxxxx

---
arch/microblaze/pci/pci-common.c | 11 ++++---
arch/powerpc/kernel/pci-common.c | 11 ++++---
arch/sparc/kernel/pci.c | 4 --
arch/xtensa/kernel/pci.c | 13 ++++++--
drivers/pci/pci-sysfs.c | 32 ++++++++++----------
drivers/pci/pci.h | 2 -
drivers/pci/proc.c | 60 +++++++++++++++++++++++++++++++++------
7 files changed, 93 insertions(+), 40 deletions(-)

Index: linux-2.6/arch/microblaze/pci/pci-common.c
===================================================================
--- linux-2.6.orig/arch/microblaze/pci/pci-common.c
+++ linux-2.6/arch/microblaze/pci/pci-common.c
@@ -282,12 +282,15 @@ int pci_mmap_page_range(struct pci_dev *
{
resource_size_t offset =
((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
- struct resource *rp;
int ret;

- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
+ if (mmap_state == pci_mmap_io) {
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ /* hose should never be NULL */
+ offset += hose->io_base_phys -
+ ((unsigned long)hose->io_base_virt - _IO_BASE);
+ }

vma->vm_pgoff = offset >> PAGE_SHIFT;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
Index: linux-2.6/arch/powerpc/kernel/pci-common.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/pci-common.c
+++ linux-2.6/arch/powerpc/kernel/pci-common.c
@@ -420,12 +420,15 @@ int pci_mmap_page_range(struct pci_dev *
{
resource_size_t offset =
((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
- struct resource *rp;
int ret;

- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
+ if (mmap_state == pci_mmap_io) {
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ /* hose should never be NULL */
+ offset += hose->io_base_phys -
+ ((unsigned long)hose->io_base_virt - _IO_BASE);
+ }

vma->vm_pgoff = offset >> PAGE_SHIFT;
if (write_combine)
Index: linux-2.6/arch/sparc/kernel/pci.c
===================================================================
--- linux-2.6.orig/arch/sparc/kernel/pci.c
+++ linux-2.6/arch/sparc/kernel/pci.c
@@ -868,10 +868,6 @@ int pci_mmap_page_range(struct pci_dev *
{
int ret;

- ret = __pci_mmap_make_offset(dev, vma, mmap_state);
- if (ret < 0)
- return ret;
-
__pci_mmap_set_pgprot(dev, vma, mmap_state);

vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
Index: linux-2.6/arch/xtensa/kernel/pci.c
===================================================================
--- linux-2.6.orig/arch/xtensa/kernel/pci.c
+++ linux-2.6/arch/xtensa/kernel/pci.c
@@ -366,11 +366,18 @@ int pci_mmap_page_range(struct pci_dev *
enum pci_mmap_state mmap_state,
int write_combine)
{
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
int ret;

- ret = __pci_mmap_make_offset(dev, vma, mmap_state);
- if (ret < 0)
- return ret;
+ if (mmap_state == pci_mmap_io) {
+ struct pci_controller *pci_ctrl =
+ (struct pci_controller *)dev->sysdata;
+
+ /* pci_ctrl should never be NULL */
+ offset += pci_ctrl->io_space.start - pci_ctrl->io_space.base;
+ }
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;

__pci_mmap_set_pgprot(dev, vma, mmap_state, write_combine);

Index: linux-2.6/drivers/pci/pci-sysfs.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci-sysfs.c
+++ linux-2.6/drivers/pci/pci-sysfs.c
@@ -972,22 +972,28 @@ void pci_remove_legacy_files(struct pci_
#ifdef HAVE_PCI_MMAP

int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_type,
enum pci_mmap_api mmap_api)
{
unsigned long nr, start, size, pci_start = 0;
+ int flags;

if (pci_resource_len(pdev, resno) == 0)
return 0;
+
+ if (mmap_type == pci_mmap_mem)
+ flags = IORESOURCE_MEM;
+ else
+ flags = IORESOURCE_IO;
+
+ if (!(pci_resource_flags(pdev, resno) & flags))
+ return 0;
+
nr = vma_pages(vma);
start = vma->vm_pgoff;
size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
- if (mmap_api == PCI_MMAP_PROCFS) {
- resource_size_t user_start, user_end;
-
- pci_resource_to_user(pdev, resno, &pdev->resource[resno],
- &user_start, &user_end);
- pci_start = user_start >> PAGE_SHIFT;
- }
+ pci_start = (mmap_api == PCI_MMAP_PROCFS) ?
+ pci_resource_start(pdev, resno) >> PAGE_SHIFT : 0;
if (start >= pci_start && start < pci_start + size &&
start + nr <= pci_start + size)
return 1;
@@ -1009,7 +1015,6 @@ static int pci_mmap_resource(struct kobj
struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
struct resource *res = attr->private;
enum pci_mmap_state mmap_type;
- resource_size_t start, end;
int i;

for (i = 0; i < PCI_ROM_RESOURCE; i++)
@@ -1021,7 +1026,8 @@ static int pci_mmap_resource(struct kobj
if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
return -EINVAL;

- if (!pci_mmap_fits(pdev, i, vma, PCI_MMAP_SYSFS)) {
+ mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+ if (!pci_mmap_fits(pdev, i, vma, mmap_type, PCI_MMAP_SYSFS)) {
WARN(1, "process \"%s\" tried to map 0x%08lx bytes at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff,
pci_name(pdev), i,
@@ -1030,13 +1036,7 @@ static int pci_mmap_resource(struct kobj
return -EINVAL;
}

- /* pci_mmap_page_range() expects the same kind of entry as coming
- * from /proc/bus/pci/ which is a "user visible" value. If this is
- * different from the resource itself, arch will do necessary fixup.
- */
- pci_resource_to_user(pdev, i, res, &start, &end);
- vma->vm_pgoff += start >> PAGE_SHIFT;
- mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
+ vma->vm_pgoff += res->start >> PAGE_SHIFT;
return pci_mmap_page_range(pdev, vma, mmap_type, write_combine);
}

Index: linux-2.6/drivers/pci/proc.c
===================================================================
--- linux-2.6.orig/drivers/pci/proc.c
+++ linux-2.6/drivers/pci/proc.c
@@ -227,24 +227,68 @@ static long proc_bus_pci_ioctl(struct fi
}

#ifdef HAVE_PCI_MMAP
+
+static int pci_user_to_resource(struct pci_dev *dev, resource_size_t *offset,
+ int flags)
+{
+ int i;
+
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ resource_size_t start, end;
+ struct resource *res = &dev->resource[i];
+
+ if (!(res->flags & flags))
+ continue;
+
+ if (pci_resource_len(dev, i) == 0)
+ continue;
+
+ /*
+ * here *offset is PAGE_SIZE aligned from caller.
+ * need align start/end for io port resource that is
+ * usually not PAGE_SIZE aligned.
+ * that means we let it go if they falls in same page.
+ */
+ pci_resource_to_user(dev, i, res, &start, &end);
+ if ((start & PAGE_MASK) <= *offset &&
+ *offset <= (end & PAGE_MASK)) {
+ *offset = res->start + (*offset - start);
+ return i;
+ }
+ }
+
+ return -ENODEV;
+}
+
static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
{
struct pci_dev *dev = PDE_DATA(file_inode(file));
struct pci_filp_private *fpriv = file->private_data;
- int i, ret, write_combine;
+ resource_size_t offset;
+ int i, ret, flags, write_combine;

if (!capable(CAP_SYS_RAWIO))
return -EPERM;

- /* Make sure the caller is mapping a real resource for this device */
- for (i = 0; i < PCI_ROM_RESOURCE; i++) {
- if (pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS))
- break;
- }
-
- if (i >= PCI_ROM_RESOURCE)
+ offset = vma->vm_pgoff << PAGE_SHIFT;
+ if (fpriv->mmap_state == pci_mmap_mem)
+ flags = IORESOURCE_MEM;
+ else
+ flags = IORESOURCE_IO;
+ i = pci_user_to_resource(dev, &offset, flags);
+ if (i < 0)
return -ENODEV;

+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ if (!pci_mmap_fits(dev, i, vma, fpriv->mmap_state, PCI_MMAP_PROCFS)) {
+ WARN(1, "process \"%s\" tried to map 0x%08lx bytes at page 0x%08lx on %s BAR %d (start 0x%16Lx, size 0x%16Lx)\n",
+ current->comm, vma->vm_end-vma->vm_start, vma->vm_pgoff,
+ pci_name(dev), i,
+ (u64)pci_resource_start(dev, i),
+ (u64)pci_resource_len(dev, i));
+ return -EINVAL;
+ }
+
if (fpriv->mmap_state == pci_mmap_mem)
write_combine = fpriv->write_combine;
else
Index: linux-2.6/drivers/pci/pci.h
===================================================================
--- linux-2.6.orig/drivers/pci/pci.h
+++ linux-2.6/drivers/pci/pci.h
@@ -30,7 +30,7 @@ enum pci_mmap_api {
PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/<BDF> */
};
int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai,
- enum pci_mmap_api mmap_api);
+ enum pci_mmap_state mmap_type, enum pci_mmap_api mmap_api);
#endif
int pci_probe_reset_function(struct pci_dev *dev);