Re: [PATCH v2][RESEND] perf, x86: Fix multi-segment problem of perf_event_intel_uncore

From: Peter Zijlstra
Date: Tue Sep 01 2015 - 07:15:47 EST


On Thu, Aug 27, 2015 at 07:54:32PM +0900, Taku Izumi wrote:
> This patch fixes ths problem by introducing segment-aware pci2phy_map instead.
>
> v1 -> v2:
> - Extract method named uncore_pcibus_to_physid to avoid repetetion of
> retrieving phys_id code

So close and yet so far...

> + raw_spin_lock(&pci2phy_map_lock);
> + list_for_each_entry(map, &pci2phy_map_head, list) {
> + if (map->segment == segment) {
> + found = true;
> + break;
> + }
> + }
> + if (!found) {
> + map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
> + if (map) {
> + map->segment = segment;
> + map->pbus_to_physid[bus] = 0;
> + list_add_tail(&map->list, &pci2phy_map_head);
> + }
> + } else {
> + map->pbus_to_physid[bus] = 0;
> + }
> + raw_spin_unlock(&pci2phy_map_lock);

> +
> + segment = pci_domain_nr(ubox_dev->bus);
> + raw_spin_lock(&pci2phy_map_lock);
> + list_for_each_entry(map, &pci2phy_map_head, list) {
> + if (map->segment == segment) {
> + found = true;
> break;
> }
> }
> + if (!found) {
> + map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
> + if (map) {
> + map->segment = segment;
> + list_add_tail(&map->list, &pci2phy_map_head);
> + }
> + }
> + if (map) {
> + /*
> + * every three bits in the Node ID mapping register
> + * maps to a particular node.
> + */
> + for (i = 0; i < 8; i++) {
> + if (nodeid == ((config >> (3 * i)) & 0x7)) {
> + map->pbus_to_physid[bus] = i;
> + break;
> + }
> + }
> +
> + }
> + raw_spin_unlock(&pci2phy_map_lock);
> }

Nothing there strikes you as repetitive ?

Also, no mention on the -ENOMEM handling, and you simply _CANNOT_ do a
GFP_KERNEL alloc while holding a spinlock, so I bet you didn't actually
test the patch either.

Maybe something like the below? Equally untested.

---

--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -23,8 +23,8 @@ struct event_constraint uncore_constrain

int uncore_pcibus_to_physid(struct pci_bus *bus)
{
- int phys_id = -1;
struct pci2phy_map *map;
+ int phys_id = -1;

raw_spin_lock(&pci2phy_map_lock);
list_for_each_entry(map, &pci2phy_map_head, list) {
@@ -38,6 +38,26 @@ int uncore_pcibus_to_physid(struct pci_b
return phys_id;
}

+struct __find_pci2phy(int segment)
+{
+ struct pci2phy_map *map;
+
+ lockdep_assert_held(&pci2phy_map_lock);
+
+ list_for_each_entry(map, &pci2phy_map_head, list) {
+ if (map->segment == segment)
+ return map;
+ }
+
+ map = kmalloc(sizeof(struct pci2phy_map), GFP_ATOMIC);
+ if (map) {
+ map->segment = segment;
+ list_add_tail(&map->list, &pci2phy_map_head);
+ }
+
+ return map;
+}
+
ssize_t uncore_event_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -433,22 +433,12 @@ static int snb_pci2phy_map_init(int devi
segment = pci_domain_nr(dev->bus);

raw_spin_lock(&pci2phy_map_lock);
- list_for_each_entry(map, &pci2phy_map_head, list) {
- if (map->segment == segment) {
- found = true;
- break;
- }
- }
- if (!found) {
- map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
- if (map) {
- map->segment = segment;
- map->pbus_to_physid[bus] = 0;
- list_add_tail(&map->list, &pci2phy_map_head);
- }
- } else {
- map->pbus_to_physid[bus] = 0;
+ map = __find_phy2pci(segment);
+ if (!map) {
+ raw_spin_unlock(&pci2phy_map_lock);
+ return -ENOMEM;
}
+ map->pbus_to_physid[bus] = 0;
raw_spin_unlock(&pci2phy_map_lock);

pci_dev_put(dev);
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1112,31 +1112,21 @@ static int snbep_pci2phy_map_init(int de

segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
- list_for_each_entry(map, &pci2phy_map_head, list) {
- if (map->segment == segment) {
- found = true;
- break;
- }
- }
- if (!found) {
- map = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
- if (map) {
- map->segment = segment;
- list_add_tail(&map->list, &pci2phy_map_head);
- }
+ map = __find_pci2phy(segment);
+ if (!map) {
+ err = -ENOMEM;
+ break;
}
- if (map) {
- /*
- * every three bits in the Node ID mapping register
- * maps to a particular node.
- */
- for (i = 0; i < 8; i++) {
- if (nodeid == ((config >> (3 * i)) & 0x7)) {
- map->pbus_to_physid[bus] = i;
- break;
- }
- }

+ /*
+ * every three bits in the Node ID mapping register
+ * maps to a particular node.
+ */
+ for (i = 0; i < 8; i++) {
+ if (nodeid == ((config >> (3 * i)) & 0x7)) {
+ map->pbus_to_physid[bus] = i;
+ break;
+ }
}
raw_spin_unlock(&pci2phy_map_lock);
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/