Re: [PATCH v4 1/3] PCI: altera: Add Stratix 10 PCIe support

From: Ley Foon Tan
Date: Mon Feb 25 2019 - 04:35:16 EST


On Tue, 2019-02-19 at 16:23 +0000, Lorenzo Pieralisi wrote:
> On Thu, Feb 14, 2019 at 11:20:36PM +0800, Ley Foon Tan wrote:
> >
> > Add PCIe Root Port support for Stratix 10 device.
> >
> > Main differences:
> Main differences with what ? We need to rewrite this commit log.
Differences compare with Cyclone V and Arria 10 devices.Â
I will rewrite this.
>
> >
> > - HIP interface to access Root Port configuration register.
> > - TLP programming flow:
> > Â - One REG0 register
> > Â - Don't need to check alignment
> >
> > Signed-off-by: Ley Foon Tan <ley.foon.tan@xxxxxxxxx>
> > ---
> > Âdrivers/pci/controller/pcie-altera.c |ÂÂ246
> > ++++++++++++++++++++++++++++++----
> > Â1 files changed, 222 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/pci/controller/pcie-altera.c
> > b/drivers/pci/controller/pcie-altera.c
> > index 7d05e51..76bb6a6 100644
> > --- a/drivers/pci/controller/pcie-altera.c
> > +++ b/drivers/pci/controller/pcie-altera.c
> > @@ -11,6 +11,7 @@
> > Â#include <linux/irqchip/chained_irq.h>
> > Â#include <linux/init.h>
> > Â#include <linux/of_address.h>
> > +#include <linux/of_device.h>
> > Â#include <linux/of_irq.h>
> > Â#include <linux/of_pci.h>
> > Â#include <linux/pci.h>
> > @@ -37,7 +38,12 @@
> > Â#define RP_LTSSM_MASK 0x1f
> > Â#define LTSSM_L0 0xf
> > Â
> > -#define PCIE_CAP_OFFSET 0x80
> > +#define S10_RP_TX_CNTRL 0x2004
> > +#define S10_RP_RXCPL_REG 0x2008
> > +#define S10_RP_RXCPL_STATUS 0x200C
> > +#define S10_RP_CFG_ADDR(pcie, reg) \
> > + (((pcie)->hip_base) + (reg) + (1 << 20))
> > +
> > Â/* TLP configuration type 0 and 1 */
> > Â#define TLP_FMTTYPE_CFGRD0 0x04 /*
> > Configuration Read Type 0 */
> > Â#define TLP_FMTTYPE_CFGWR0 0x44 /*
> > Configuration Write Type 0 */
> > @@ -49,18 +55,19 @@
> > Â#define RP_DEVFN 0
> > Â#define TLP_REQ_ID(bus, devfn) (((bus) << 8) |
> > (devfn))
> > Â#define TLP_CFGRD_DW0(pcie, bus)
> > \
> > -ÂÂÂÂ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0
> > \
> > - ÂÂÂÂ: TLP_FMTTYPE_CFGRD1) << 24) |
> > \
> > -ÂÂÂÂÂTLP_PAYLOAD_SIZE)
> > + ((((bus == pcie->root_bus_nr) ? pcie->pcie_data->cfgrd0
> > \
> > + : pcie->pcie_data->cfgrd1) << 24)
> > | \
> > + TLP_PAYLOAD_SIZE)
> > Â#define TLP_CFGWR_DW0(pcie, bus)
> > \
> > -ÂÂÂÂ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGWR0
> > \
> > - ÂÂÂÂ: TLP_FMTTYPE_CFGWR1) << 24) |
> > \
> > -ÂÂÂÂÂTLP_PAYLOAD_SIZE)
> > + ((((bus == pcie->root_bus_nr) ? pcie->pcie_data->cfgwr0
> > \
> > + : pcie->pcie_data->cfgwr1) << 24)
> > | \
> > + TLP_PAYLOAD_SIZE)
> > Â#define TLP_CFG_DW1(pcie, tag, be) \
> > -ÂÂÂÂ(((TLP_REQ_ID(pcie->root_bus_nr,ÂÂRP_DEVFN)) << 16) | (tag <<
> > 8) | (be))
> > + (((TLP_REQ_ID(pcie->root_bus_nr,ÂÂRP_DEVFN)) << 16) | (tag
> > << 8) | (be))
> > Â#define TLP_CFG_DW2(bus, devfn, offset) \
> > Â (((bus) << 24) | ((devfn) << 16) |
> > (offset))
> > Â#define TLP_COMP_STATUS(s) (((s) >> 13) & 7)
> > +#define TLP_BYTE_COUNT(s) (((s) >> 0) & 0xfff)
> > Â#define TLP_HDR_SIZE 3
> > Â#define TLP_LOOP 500
> > Â
> > @@ -69,14 +76,43 @@
> > Â
> > Â#define DWORD_MASK 3
> > Â
> > +#define S10_TLP_FMTTYPE_CFGRD0 0x05
> > +#define S10_TLP_FMTTYPE_CFGRD1 0x04
> > +#define S10_TLP_FMTTYPE_CFGWR0 0x45
> > +#define S10_TLP_FMTTYPE_CFGWR1 0x44
> > +
> > +enum altera_pcie_version {
> > + ALTERA_PCIE_V1 = 0,
> > + ALTERA_PCIE_V2,
> > +};
> > +
> > Âstruct altera_pcie {
> > Â struct platform_device *pdev;
> > - void __iomem *cra_base; /* DT Cra */
> > + void __iomem *cra_base;
> > + void __iomem *hip_base;
> > Â int irq;
> > Â u8 root_bus_nr;
> > Â struct irq_domain *irq_domain;
> > Â struct resource bus_range;
> > Â struct list_head resources;
> > + const struct altera_pcie_data *pcie_data;
> > +};
> > +
> > +struct altera_pcie_data {
> > + int (*tlp_read_pkt)(struct altera_pcie *pcie, u32 *value);
> > + void (*tlp_write_pkt)(struct altera_pcie *pcie, u32
> > *headers,
> > + ÂÂÂÂÂÂu32 data, bool align);
> > + bool (*get_link_status)(struct altera_pcie *pcie);
> > + int (*rp_read_cfg)(struct altera_pcie *pcie, int where,
> > + ÂÂÂint size, u32 *value);
> > + int (*rp_write_cfg)(struct altera_pcie *pcie, u8 bus, int
> > where,
> > + ÂÂÂÂint size, u32 value);
> > + enum altera_pcie_version version;
> > + u32 cap_offset; /* PCIe capability
> > structure register offset */
> This is a duplication of struct pci_dev.pcie_cap right ?

This is register offset in PCIe Rootport IP to access PCIe Cap
structure register of directly RP. This allow us to access PCIe cap
struct register without pci_dev.

>
> >
> > + u32 cfgrd0;
> > + u32 cfgrd1;
> > + u32 cfgwr0;
> > + u32 cfgwr1;
> > Â};
> > Â
> > Âstruct tlp_rp_regpair_t {
> > @@ -101,6 +137,15 @@ static bool altera_pcie_link_up(struct
> > altera_pcie *pcie)
> > Â return !!((cra_readl(pcie, RP_LTSSM) & RP_LTSSM_MASK) ==
> > LTSSM_L0);
> > Â}
> > Â
> > +static bool s10_altera_pcie_link_up(struct altera_pcie *pcie)
> > +{
> > + void __iomem *addr = S10_RP_CFG_ADDR(pcie,
> > + ÂÂÂpcie->pcie_data->cap_offset +
> > + ÂÂÂPCI_EXP_LNKSTA);
> > +
> > + return !!(readw(addr) & PCI_EXP_LNKSTA_DLLLA);
> > +}
> > +
> > Â/*
> > Â * Altera PCIe port uses BAR0 of RC's configuration space as the
> > translation
> > Â * from PCI bus to native BUS.ÂÂEntire DDR region is mapped into
> > PCIe space
> > @@ -128,12 +173,18 @@ static void tlp_write_tx(struct altera_pcie
> > *pcie,
> > Â cra_writel(pcie, tlp_rp_regdata->ctrl, RP_TX_CNTRL);
> > Â}
> > Â
> > +static void s10_tlp_write_tx(struct altera_pcie *pcie, u32 reg0,
> > u32 ctrl)
> > +{
> > + cra_writel(pcie, reg0, RP_TX_REG0);
> > + cra_writel(pcie, ctrl, S10_RP_TX_CNTRL);
> > +}
> > +
> > Âstatic bool altera_pcie_valid_device(struct altera_pcie *pcie,
> > Â ÂÂÂÂÂstruct pci_bus *bus, int dev)
> > Â{
> > Â /* If there is no link, then there is no device */
> > Â if (bus->number != pcie->root_bus_nr) {
> > - if (!altera_pcie_link_up(pcie))
> > + if (!pcie->pcie_data->get_link_status(pcie))
> > Â return false;
> > Â }
> > Â
> > @@ -183,6 +234,46 @@ static int tlp_read_packet(struct altera_pcie
> > *pcie, u32 *value)
> > Â return PCIBIOS_DEVICE_NOT_FOUND;
> > Â}
> > Â
> > +static int s10_tlp_read_packet(struct altera_pcie *pcie, u32
> > *value)
> > +{
> > + int i;
> > + u32 ctrl;
> > + u32 comp_status;
> > + u32 dw[4];
> > + u32 count = 0;
> > +
> > + for (i = 0; i < TLP_LOOP; i++) {
> > + ctrl = cra_readl(pcie, S10_RP_RXCPL_STATUS);
> > + if (!(ctrl & RP_RXCPL_SOP))
> > + continue;
> > +
> > + /* Read first DW */
> > + dw[count++] = cra_readl(pcie, S10_RP_RXCPL_REG);
> > +
> > + /* Poll for EOP */
> > + for (i = 0; i < TLP_LOOP; i++) {
> > + ctrl = cra_readl(pcie,
> > S10_RP_RXCPL_STATUS);
> > + dw[count++] = cra_readl(pcie,
> > S10_RP_RXCPL_REG);
> I think you'd better add a check on count lest it can overflow dw[].
Okay, will check that.
>
> >
> > + if (ctrl & RP_RXCPL_EOP) {
> > + comp_status =
> > TLP_COMP_STATUS(dw[1]);
> > + if (comp_status)
> > + return
> > PCIBIOS_DEVICE_NOT_FOUND;
> > +
> > + if (value &&
> > + ÂÂÂÂTLP_BYTE_COUNT(dw[1]) ==
> > sizeof(u32) &&
> > + ÂÂÂÂcount >= 3)
> > + *value = dw[3];
> > +
> > + return PCIBIOS_SUCCESSFUL;
> > + }
> > + }
> > +
> > + udelay(5);
> > + }
> > +
> > + return PCIBIOS_DEVICE_NOT_FOUND;
> > +}
> > +
> > Âstatic void tlp_write_packet(struct altera_pcie *pcie, u32
> > *headers,
> > Â ÂÂÂÂÂu32 data, bool align)
> > Â{
> > @@ -210,6 +301,15 @@ static void tlp_write_packet(struct
> > altera_pcie *pcie, u32 *headers,
> > Â tlp_write_tx(pcie, &tlp_rp_regdata);
> > Â}
> > Â
> > +static void s10_tlp_write_packet(struct altera_pcie *pcie, u32
> > *headers,
> > + Âu32 data, bool dummy)
> > +{
> > + s10_tlp_write_tx(pcie, headers[0], RP_TX_SOP);
> > + s10_tlp_write_tx(pcie, headers[1], 0);
> > + s10_tlp_write_tx(pcie, headers[2], 0);
> > + s10_tlp_write_tx(pcie, data, RP_TX_EOP);
> > +}
> > +
> > Âstatic int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus,
> > u32 devfn,
> > Â ÂÂÂÂÂÂint where, u8 byte_en, u32 *value)
> > Â{
> > @@ -219,9 +319,9 @@ static int tlp_cfg_dword_read(struct
> > altera_pcie *pcie, u8 bus, u32 devfn,
> > Â headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en);
> > Â headers[2] = TLP_CFG_DW2(bus, devfn, where);
> > Â
> > - tlp_write_packet(pcie, headers, 0, false);
> > + pcie->pcie_data->tlp_write_pkt(pcie, headers, 0, false);
> > Â
> > - return tlp_read_packet(pcie, value);
> > + return pcie->pcie_data->tlp_read_pkt(pcie, value);
> > Â}
> > Â
> > Âstatic int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus,
> > u32 devfn,
> > @@ -236,11 +336,11 @@ static int tlp_cfg_dword_write(struct
> > altera_pcie *pcie, u8 bus, u32 devfn,
> > Â
> > Â /* check alignment to Qword */
> > Â if ((where & 0x7) == 0)
> > - tlp_write_packet(pcie, headers, value, true);
> > + pcie->pcie_data->tlp_write_pkt(pcie, headers,
> > value, true);
> > Â else
> > - tlp_write_packet(pcie, headers, value, false);
> > + pcie->pcie_data->tlp_write_pkt(pcie, headers,
> > value, false);
> > Â
> > - ret = tlp_read_packet(pcie, NULL);
> > + ret = pcie->pcie_data->tlp_read_pkt(pcie, NULL);
> > Â if (ret != PCIBIOS_SUCCESSFUL)
> > Â return ret;
> > Â
> > @@ -254,6 +354,53 @@ static int tlp_cfg_dword_write(struct
> > altera_pcie *pcie, u8 bus, u32 devfn,
> > Â return PCIBIOS_SUCCESSFUL;
> > Â}
> > Â
> > +static int s10_rp_read_cfg(struct altera_pcie *pcie, int where,
> > + ÂÂÂint size, u32 *value)
> > +{
> > + void *addr = S10_RP_CFG_ADDR(pcie, where);
> > +
> > + switch (size) {
> > + case 1:
> > + *value = readb(addr);
> > + break;
> > + case 2:
> > + *value = readw(addr);
> > + break;
> > + default:
> > + *value = readl(addr);
> > + break;
> > + }
> This boilerplace could be avoided if you used generic config
> accessors (ie pci_generic_config_read()/write()).
Okay. I will change this.
>
> >
> > +
> > + return PCIBIOS_SUCCESSFUL;
> > +}
> > +
> > +static int s10_rp_write_cfg(struct altera_pcie *pcie, u8 bus, int
> > where,
> > + ÂÂÂÂint size, u32 value)
> > +{
> > + void *addr = S10_RP_CFG_ADDR(pcie, where);
> > +
> > + switch (size) {
> > + case 1:
> > + writeb(value, addr);
> > + break;
> > + case 2:
> > + writew(value, addr);
> > + break;
> > + default:
> > + writel(value, addr);
> > + break;
> > + }
> Ditto.
Noted.
>
> >
> > +
> > + /*
> > + Â* Monitor changes to PCI_PRIMARY_BUS register on root
> > port
> > + Â* and update local copy of root bus number accordingly.
> > + Â*/
> > + if (bus == pcie->root_bus_nr && where == PCI_PRIMARY_BUS)
> > + pcie->root_bus_nr = value & 0xff;
> > +
> > + return PCIBIOS_SUCCESSFUL;
> > +}
> > +
> > Âstatic int _altera_pcie_cfg_read(struct altera_pcie *pcie, u8
> > busno,
> > Â Âunsigned int devfn, int where,
> > int size,
> > Â Âu32 *value)
> > @@ -262,6 +409,9 @@ static int _altera_pcie_cfg_read(struct
> > altera_pcie *pcie, u8 busno,
> > Â u32 data;
> > Â u8 byte_en;
> > Â
> > + if (busno == pcie->root_bus_nr && pcie->pcie_data-
> > >rp_read_cfg)
> > + return pcie->pcie_data->rp_read_cfg(pcie, where,
> > size, value);
> > +
> > Â switch (size) {
> > Â case 1:
> > Â byte_en = 1 << (where & 3);
> > @@ -302,6 +452,10 @@ static int _altera_pcie_cfg_write(struct
> > altera_pcie *pcie, u8 busno,
> > Â u32 shift = 8 * (where & 3);
> > Â u8 byte_en;
> > Â
> > + if (busno == pcie->root_bus_nr && pcie->pcie_data-
> > >rp_write_cfg)
> > + return pcie->pcie_data->rp_write_cfg(pcie, busno,
> > where,
> > + size,
> > value);
> > +
> > Â switch (size) {
> > Â case 1:
> > Â data32 = (value & 0xff) << shift;
> > @@ -365,7 +519,8 @@ static int altera_read_cap_word(struct
> > altera_pcie *pcie, u8 busno,
> > Â int ret;
> > Â
> > Â ret = _altera_pcie_cfg_read(pcie, busno, devfn,
> > - ÂÂÂÂPCIE_CAP_OFFSET + offset,
> > sizeof(*value),
> > + ÂÂÂÂpcie->pcie_data->cap_offset +
> > offset,
> > + ÂÂÂÂsizeof(*value),
> > Â ÂÂÂÂ&data);
> > Â *value = data;
> > Â return ret;
> > @@ -375,7 +530,8 @@ static int altera_write_cap_word(struct
> > altera_pcie *pcie, u8 busno,
> > Â Âunsigned int devfn, int offset,
> > u16 value)
> > Â{
> > Â return _altera_pcie_cfg_write(pcie, busno, devfn,
> > - ÂÂÂÂÂÂPCIE_CAP_OFFSET + offset,
> > sizeof(value),
> > + ÂÂÂÂÂÂpcie->pcie_data->cap_offset
> > + offset,
> > + ÂÂÂÂÂÂsizeof(value),
> > Â ÂÂÂÂÂÂvalue);
> > Â}
> > Â
> > @@ -403,7 +559,7 @@ static void altera_wait_link_retrain(struct
> > altera_pcie *pcie)
> > Â /* Wait for link is up */
> > Â start_jiffies = jiffies;
> > Â for (;;) {
> > - if (altera_pcie_link_up(pcie))
> > + if (pcie->pcie_data->get_link_status(pcie))
> > Â break;
> > Â
> > Â if (time_after(jiffies, start_jiffies +
> > LINK_UP_TIMEOUT)) {
> > @@ -418,7 +574,7 @@ static void altera_pcie_retrain(struct
> > altera_pcie *pcie)
> > Â{
> > Â u16 linkcap, linkstat, linkctl;
> > Â
> > - if (!altera_pcie_link_up(pcie))
> > + if (!pcie->pcie_data->get_link_status(pcie))
> > Â return;
> > Â
> > Â /*
> > @@ -540,12 +696,20 @@ static int altera_pcie_parse_dt(struct
> > altera_pcie *pcie)
> > Â struct device *dev = &pcie->pdev->dev;
> > Â struct platform_device *pdev = pcie->pdev;
> > Â struct resource *cra;
> > + struct resource *hip;
> > Â
> > Â cra = platform_get_resource_byname(pdev, IORESOURCE_MEM,
> > "Cra");
> > Â pcie->cra_base = devm_ioremap_resource(dev, cra);
> > Â if (IS_ERR(pcie->cra_base))
> > Â return PTR_ERR(pcie->cra_base);
> > Â
> > + if (pcie->pcie_data->version == ALTERA_PCIE_V2) {
> > + hip = platform_get_resource_byname(pdev,
> > IORESOURCE_MEM, "Hip");
> > + pcie->hip_base = devm_ioremap_resource(&pdev->dev,
> > hip);
> > + if (IS_ERR(pcie->hip_base))
> > + return PTR_ERR(pcie->hip_base);
> > + }
> > +
> > Â /* setup IRQ */
> > Â pcie->irq = platform_get_irq(pdev, 0);
> > Â if (pcie->irq < 0) {
> > @@ -562,6 +726,38 @@ static void altera_pcie_host_init(struct
> > altera_pcie *pcie)
> > Â altera_pcie_retrain(pcie);
> > Â}
> > Â
> > +static struct altera_pcie_data pci_1_0_data = {
> > + .tlp_read_pkt = tlp_read_packet,
> > + .tlp_write_pkt = tlp_write_packet,
> > + .get_link_status = altera_pcie_link_up,
> > + .cap_offset = 0x80,
> > + .version = ALTERA_PCIE_V1,
> > + .cfgrd0 = TLP_FMTTYPE_CFGRD0,
> > + .cfgrd1 = TLP_FMTTYPE_CFGRD1,
> > + .cfgwr0 = TLP_FMTTYPE_CFGWR0,
> > + .cfgwr1 = TLP_FMTTYPE_CFGWR1,
> > +};
> > +
> > +static struct altera_pcie_data pci_2_0_data = {
> > + .tlp_read_pkt = s10_tlp_read_packet,
> > + .tlp_write_pkt = s10_tlp_write_packet,
> > + .get_link_status = s10_altera_pcie_link_up,
> > + .rp_read_cfg = s10_rp_read_cfg,
> > + .rp_write_cfg = s10_rp_write_cfg,
> > + .version = ALTERA_PCIE_V2,
> > + .cap_offset = 0x70,
> > + .cfgrd0 = S10_TLP_FMTTYPE_CFGRD0,
> > + .cfgrd1 = S10_TLP_FMTTYPE_CFGRD1,
> > + .cfgwr0 = S10_TLP_FMTTYPE_CFGWR0,
> > + .cfgwr1 = S10_TLP_FMTTYPE_CFGWR1,
> > +};
> I wonder if it is not better to have two different struct pci_ops
> (and
> have a pointer to them in struct altera_pcie_data) instead of all
> these
> parameters (most of which are there to differentiate struct pci_ops
> methods).
>
> See also my comments above.
>
> In the end I guess it depends on how many host bridges will be based
> on
> this IP as design, what you did is OK but it is worth thinking about
> it.
>
We have 2 versions of HW based on this IP as for now. May have new
version in future.
I can move all callback functions to struct pci_ops and have a pointer
in struct altera_pcie_data.

Thanks.


Regards
Ley Foon