Re: [PATCH] pci-sysfs: Make PCI bridge attribute visible in sysfs

From: Vee Khee Wong
Date: Thu May 04 2017 - 03:55:11 EST




On Mon, 2017-04-17 at 10:41 -0500, Bjorn Helgaas wrote:
> On Mon, Apr 17, 2017 at 12:50 AM, Wong Vee Khee <vee.khee.wong@xxxxxx
> > wrote:
> >
> > From: vwong <vee.khee.wong@xxxxxx>
> >
> > Export the PCIe link attributes of PCI bridges to sysfs.
> This needs justification for *why* we should export these via sysfs.
>
> Some of these things, e.g., secondary/subordinate bus numbers, are
> already visible to non-privileged users via "lspci -v".
>
We need to expose these attributes via sysfs due to several reasons
listed below:

1) PCIe capabilities info such as Maximum/Actual link width and link speed only visible to privileged users via "lspci -vvv". The software that my team is working on need to get PCIe link information as non-root user.

2) From a client perspective, it require a lot of overhead parsing output of lspci to get PCIe capabilities. Our application is only interested in getting PCIe bridges but lspci print info for all PCIe devices.

> >
> > Signed-off-by: Wong Vee Khee <vee.khee.wong@xxxxxx>
> > Signed-off-by: Hui Chun Ong <hui.chun.ong@xxxxxx>
> > ---
> > Âdrivers/pci/pci-sysfs.cÂÂÂÂÂÂÂ| 197
> > +++++++++++++++++++++++++++++++++++++++++-
> > Âinclude/uapi/linux/pci_regs.h |ÂÂÂ4 +
> > Â2 files changed, 197 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> > index 25d010d..a218c43 100644
> > --- a/drivers/pci/pci-sysfs.c
> > +++ b/drivers/pci/pci-sysfs.c
> > @@ -154,6 +154,127 @@ static ssize_t resource_show(struct device
> > *dev, struct device_attribute *attr,
> > Â}
> > Âstatic DEVICE_ATTR_RO(resource);
> >
> > +static ssize_t max_link_speed_show(struct device *dev,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct device_attribute *attr,
> > char *buf)
> > +{
> > +ÂÂÂÂÂÂÂstruct pci_dev *pci_dev = to_pci_dev(dev);
> > +ÂÂÂÂÂÂÂu32 linkcap;
> > +ÂÂÂÂÂÂÂint err;
> > +ÂÂÂÂÂÂÂconst char *speed;
> > +
> > +ÂÂÂÂÂÂÂerr = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP,
> > &linkcap);
> > +
> > +ÂÂÂÂÂÂÂif (!err && linkcap) {
> Â if (err)
> ÂÂÂÂreturn -EINVAL;
>
> I don't think there's a reason to test "linkcap" here.ÂÂPer spec,
> zero
> is not a valid value of LNKCAP, so if we got a zero, I think showing
> "Unknown speed" would be fine.
>
> >
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂswitch (linkcap & PCI_EXP_LNKCAP_MLS) {
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂcase PCI_EXP_LNKCAP_MLS_8_0GB:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "8 GT/s";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂcase PCI_EXP_LNKCAP_MLS_5_0GB:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "5 GT/s";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂcase PCI_EXP_LNKCAP_MLS_2_5GB:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "2.5 GT/s";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂdefault:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "Unknown speed";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ}
> > +
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn sprintf(buf, "%s\n", speed);
> > +ÂÂÂÂÂÂÂ}
> > +
> > +ÂÂÂÂÂÂÂreturn -EINVAL;
> > +}
> > +static DEVICE_ATTR_RO(max_link_speed);
> > +
> > +static ssize_t max_link_width_show(struct device *dev,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct device_attribute *attr,
> > char *buf)
> > +{
> > +ÂÂÂÂÂÂÂstruct pci_dev *pci_dev = to_pci_dev(dev);
> > +ÂÂÂÂÂÂÂu32 linkcap;
> > +ÂÂÂÂÂÂÂint err;
> > +
> > +ÂÂÂÂÂÂÂerr = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP,
> > &linkcap);
> > +
> > +ÂÂÂÂÂÂÂreturn err ? -EINVAL : sprintf(
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbuf, "%u\n", (linkcap & PCI_EXP_LNKCAP_MLW) >> 4);
> Â if (err)
> ÂÂÂÂreturn -EINVAL;
>
> Â return sprintf(...);
>
> >
> > +}
> > +static DEVICE_ATTR_RO(max_link_width);
> > +
> > +static ssize_t current_link_speed_show(struct device *dev,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct device_attribute
> > *attr, char *buf)
> > +{
> > +ÂÂÂÂÂÂÂstruct pci_dev *pci_dev = to_pci_dev(dev);
> > +ÂÂÂÂÂÂÂu16 linkstat;
> > +ÂÂÂÂÂÂÂint err;
> > +ÂÂÂÂÂÂÂconst char *speed;
> > +
> > +ÂÂÂÂÂÂÂerr = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA,
> > &linkstat);
> > +
> > +ÂÂÂÂÂÂÂif (!err && linkstat) {
> See max_link_speed above.
>
> >
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂswitch (linkstat & PCI_EXP_LNKSTA_CLS) {
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂcase PCI_EXP_LNKSTA_CLS_8_0GB:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "8 GT/s";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂcase PCI_EXP_LNKSTA_CLS_5_0GB:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "5 GT/s";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂcase PCI_EXP_LNKSTA_CLS_2_5GB:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "2.5 GT/s";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbreak;
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂdefault:
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂspeed = "Unknown speed";
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ}
> > +
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn sprintf(buf, "%s\n", speed);
> > +ÂÂÂÂÂÂÂ}
> > +
> > +ÂÂÂÂÂÂÂreturn -EINVAL;
> > +}
> > +static DEVICE_ATTR_RO(current_link_speed);
> > +
> > +static ssize_t current_link_width_show(struct device *dev,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct device_attribute
> > *attr, char *buf)
> > +{
> > +ÂÂÂÂÂÂÂstruct pci_dev *pci_dev = to_pci_dev(dev);
> > +ÂÂÂÂÂÂÂu16 linkstat;
> > +ÂÂÂÂÂÂÂint err;
> > +
> > +ÂÂÂÂÂÂÂerr = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA,
> > &linkstat);
> > +
> > +ÂÂÂÂÂÂÂreturn err ? -EINVAL : sprintf(
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂbuf, "%u\n",
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ(linkstat & PCI_EXP_LNKSTA_NLW) >>
> > PCI_EXP_LNKSTA_NLW_SHIFT);
> > +}
> > +static DEVICE_ATTR_RO(current_link_width);
> > +
> > +static ssize_t secondary_bus_number_show(struct device *dev,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct device_attribute
> > *attr,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂchar *buf)
> > +{
> > +ÂÂÂÂÂÂÂstruct pci_dev *pci_dev = to_pci_dev(dev);
> > +ÂÂÂÂÂÂÂu8 sec_bus;
> > +ÂÂÂÂÂÂÂint err;
> > +
> > +ÂÂÂÂÂÂÂerr = pci_read_config_byte(pci_dev, PCI_SECONDARY_BUS,
> > &sec_bus);
> There is already a /sys/devices/pciDDDD:BB/DDDD:BB:dd.f/<secondary>/
> directory and a .../pci_bus/ directory that looks like it is the
> secondary bus.ÂÂIs that enough?
>
> If we also need this file, I would like it to do something sensible
> when there is no secondary bus.ÂÂMaybe that is exposing the bus
> numbers directly, or maybe it is something else.ÂÂI tend to think
> that
> if you just want the register contents, lspci is enough, and if you
> need something in sysfs, it should be a little more digested, e.g.,
> the existing subdirectory.
>
> It'd be helpful to know something about how you want to use this.
>
> >
> > +ÂÂÂÂÂÂÂreturn err ? -EINVAL : sprintf(buf, "%u\n", sec_bus);
> > +}
> > +static DEVICE_ATTR_RO(secondary_bus_number);
> > +
> > +static ssize_t subordinate_bus_number_show(struct device *dev,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct device_attribute
> > *attr,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂchar *buf)
> > +{
> > +ÂÂÂÂÂÂÂstruct pci_dev *pci_dev = to_pci_dev(dev);
> > +ÂÂÂÂÂÂÂu8 sub_bus;
> > +ÂÂÂÂÂÂÂint err;
> > +
> > +ÂÂÂÂÂÂÂerr = pci_read_config_byte(pci_dev, PCI_SUBORDINATE_BUS,
> > &sub_bus);
> > +
> > +ÂÂÂÂÂÂÂreturn err ? -EINVAL : sprintf(buf, "%u\n", sub_bus);
> > +}
> > +static DEVICE_ATTR_RO(subordinate_bus_number);
> > +
> > Âstatic ssize_t modalias_show(struct device *dev, struct
> > device_attribute *attr,
> > ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂchar *buf)
> > Â{
> > @@ -602,12 +723,17 @@ static struct attribute *pci_dev_attrs[] = {
> > ÂÂÂÂÂÂÂÂNULL,
> > Â};
> >
> > -static const struct attribute_group pci_dev_group = {
> > -ÂÂÂÂÂÂÂ.attrs = pci_dev_attrs,
> > +static struct attribute *pci_bridge_attrs[] = {
> > +ÂÂÂÂÂÂÂ&dev_attr_subordinate_bus_number.attr,
> > +ÂÂÂÂÂÂÂ&dev_attr_secondary_bus_number.attr,
> > +ÂÂÂÂÂÂÂNULL,
> > Â};
> >
> > -const struct attribute_group *pci_dev_groups[] = {
> > -ÂÂÂÂÂÂÂ&pci_dev_group,
> > +static struct attribute *pcie_dev_attrs[] = {
> > +ÂÂÂÂÂÂÂ&dev_attr_current_link_speed.attr,
> > +ÂÂÂÂÂÂÂ&dev_attr_current_link_width.attr,
> > +ÂÂÂÂÂÂÂ&dev_attr_max_link_width.attr,
> > +ÂÂÂÂÂÂÂ&dev_attr_max_link_speed.attr,
> > ÂÂÂÂÂÂÂÂNULL,
> > Â};
> >
> > @@ -1540,6 +1666,57 @@ static umode_t
> > pci_dev_hp_attrs_are_visible(struct kobject *kobj,
> > ÂÂÂÂÂÂÂÂreturn a->mode;
> > Â}
> >
> > +static umode_t pci_bridge_attrs_are_visible(struct kobject *kobj,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct attribute *a,
> > int n)
> > +{
> > +ÂÂÂÂÂÂÂstruct device *dev = kobj_to_dev(kobj);
> > +ÂÂÂÂÂÂÂstruct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +ÂÂÂÂÂÂÂif (!pci_is_bridge(pdev))
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn 0;
> Â if (pci_is_bridge(pdev))
> ÂÂÂÂreturn a->mode;
>
> Â return 0;
>
> I think it's easier to read without the negation.ÂÂPossibly that's
> just my personal preference :)
>
> >
> > +
> > +ÂÂÂÂÂÂÂreturn a->mode;
> > +}
> > +
> > +static umode_t pcie_dev_attrs_are_visible(struct kobject *kobj,
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂstruct attribute *a, int
> > n)
> > +{
> > +ÂÂÂÂÂÂÂstruct device *dev = kobj_to_dev(kobj);
> > +ÂÂÂÂÂÂÂstruct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +ÂÂÂÂÂÂÂif (pci_find_capability(pdev, PCI_CAP_ID_EXP) == 0)
> Use pci_is_pcie().
>
> >
> > +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂreturn 0;
> > +
> > +ÂÂÂÂÂÂÂreturn a->mode;
> > +}
> > +
> > +static const struct attribute_group pci_dev_group = {
> > +ÂÂÂÂÂÂÂ.attrs = pci_dev_attrs,
> > +};
> > +
> > +const struct attribute_group *pci_dev_groups[] = {
> > +ÂÂÂÂÂÂÂ&pci_dev_group,
> > +ÂÂÂÂÂÂÂNULL,
> > +};
> > +
> > +static const struct attribute_group pci_bridge_group = {
> > +ÂÂÂÂÂÂÂ.attrs = pci_bridge_attrs,
> > +};
> > +
> > +const struct attribute_group *pci_bridge_groups[] = {
> > +ÂÂÂÂÂÂÂ&pci_bridge_group,
> > +ÂÂÂÂÂÂÂNULL,
> > +};
> > +
> > +static const struct attribute_group pcie_dev_group = {
> > +ÂÂÂÂÂÂÂ.attrs = pcie_dev_attrs,
> > +};
> > +
> > +const struct attribute_group *pcie_dev_groups[] = {
> > +ÂÂÂÂÂÂÂ&pcie_dev_group,
> > +ÂÂÂÂÂÂÂNULL,
> > +};
> > +
> > Âstatic struct attribute_group pci_dev_hp_attr_group = {
> > ÂÂÂÂÂÂÂÂ.attrs = pci_dev_hp_attrs,
> > ÂÂÂÂÂÂÂÂ.is_visible = pci_dev_hp_attrs_are_visible,
> > @@ -1574,12 +1751,24 @@ static struct attribute_group
> > pci_dev_attr_group = {
> > ÂÂÂÂÂÂÂÂ.is_visible = pci_dev_attrs_are_visible,
> > Â};
> >
> > +static struct attribute_group pci_bridge_attr_group = {
> > +ÂÂÂÂÂÂÂ.attrs = pci_bridge_attrs,
> > +ÂÂÂÂÂÂÂ.is_visible = pci_bridge_attrs_are_visible,
> > +};
> > +
> > +static struct attribute_group pcie_dev_attr_group = {
> > +ÂÂÂÂÂÂÂ.attrs = pcie_dev_attrs,
> > +ÂÂÂÂÂÂÂ.is_visible = pcie_dev_attrs_are_visible,
> > +};
> > +
> > Âstatic const struct attribute_group *pci_dev_attr_groups[] = {
> > ÂÂÂÂÂÂÂÂ&pci_dev_attr_group,
> > ÂÂÂÂÂÂÂÂ&pci_dev_hp_attr_group,
> > Â#ifdef CONFIG_PCI_IOV
> > ÂÂÂÂÂÂÂÂ&sriov_dev_attr_group,
> > Â#endif
> > +ÂÂÂÂÂÂÂ&pci_bridge_attr_group,
> > +ÂÂÂÂÂÂÂ&pcie_dev_attr_group,
> > ÂÂÂÂÂÂÂÂNULL,
> > Â};
> >
> > diff --git a/include/uapi/linux/pci_regs.h
> > b/include/uapi/linux/pci_regs.h
> > index 634c9c4..b1770dc 100644
> > --- a/include/uapi/linux/pci_regs.h
> > +++ b/include/uapi/linux/pci_regs.h
> > @@ -517,6 +517,10 @@
> > Â#defineÂÂPCI_EXP_LNKCAP_SLSÂÂÂÂ0x0000000f /* Supported Link Speeds
> > */
> > Â#defineÂÂPCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector
> > bit 0 */
> > Â#defineÂÂPCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector
> > bit 1 */
> > +#defineÂÂPCI_EXP_LNKCAP_MLSÂÂÂÂ0x0000000f /* Maximum Link Speeds
> > */
> > +#defineÂÂPCI_EXP_LNKCAP_MLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector
> > bit 0 */
> > +#defineÂÂPCI_EXP_LNKCAP_MLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector
> > bit 1 */
> > +#defineÂÂPCI_EXP_LNKCAP_MLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector
> > bit 2 */
> Rather than adding these _MLS_ symbols as duplicates of _SLS_, please
> just add one SLS_8_0GB symbol.
>
> The _SLS_ names are an artifact of the fact that prior to PCIe r3.0,
> this field was the "Supported Link Speeds" field.ÂÂPCIe 3.0 renamed
> it
> to "Max Link Speed" and added the "Supported Link Speeds Vector" in
> the new Link Capabilities 2 register.
>
> >
> > Â#defineÂÂPCI_EXP_LNKCAP_MLWÂÂÂÂ0x000003f0 /* Maximum Link Width */
> > Â#defineÂÂPCI_EXP_LNKCAP_ASPMSÂÂ0x00000c00 /* ASPM Support */
> > Â#defineÂÂPCI_EXP_LNKCAP_L0SELÂÂ0x00007000 /* L0s Exit Latency */
> > --
> > 2.7.4
> >