Re: [PATCH net-next 08/11] net: hns3: add interrupt affinity support for misc interrupt

From: Saeed Mahameed
Date: Wed Jul 24 2019 - 16:36:06 EST


On Wed, 2019-07-24 at 11:18 +0800, Huazhong Tan wrote:
> From: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
>
> The misc interrupt is used to schedule the reset and mailbox
> subtask, and a 1 sec timer is used to schedule the service
> subtask, which does periodic work.
>
> This patch sets the above three subtask's affinity using the
> misc interrupt' affinity.
>
> Also this patch setups a affinity notify for misc interrupt to
> allow user to change the above three subtask's affinity.
>
> Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
> Signed-off-by: Peng Li <lipeng321@xxxxxxxxxx>
> Signed-off-by: Huazhong Tan <tanhuazhong@xxxxxxxxxx>
> ---
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 59
> ++++++++++++++++++++--
> .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 4 ++
> 2 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> index f345095..fe45986 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
> @@ -1270,6 +1270,12 @@ static int hclge_configure(struct hclge_dev
> *hdev)
>
> hclge_init_kdump_kernel_config(hdev);
>
> + /* Set the init affinity based on pci func number */
> + i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev-
> >dev)));
> + i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
> + cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev-
> >pdev->dev)),
> + &hdev->affinity_mask);
> +
> return ret;
> }
>
> @@ -2502,14 +2508,16 @@ static void hclge_mbx_task_schedule(struct
> hclge_dev *hdev)
> {
> if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev-
> >state))
> - schedule_work(&hdev->mbx_service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask),
> system_wq,
> + &hdev->mbx_service_task);
> }
>
> static void hclge_reset_task_schedule(struct hclge_dev *hdev)
> {
> if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev-
> >state))
> - schedule_work(&hdev->rst_service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask),
> system_wq,
> + &hdev->rst_service_task);
> }
>
> static void hclge_task_schedule(struct hclge_dev *hdev)
> @@ -2517,7 +2525,8 @@ static void hclge_task_schedule(struct
> hclge_dev *hdev)
> if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
> !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
> !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
> - (void)schedule_work(&hdev->service_task);
> + queue_work_on(cpumask_first(&hdev->affinity_mask),
> system_wq,
> + &hdev->service_task);
> }
>
> static int hclge_get_mac_link_status(struct hclge_dev *hdev)
> @@ -2921,6 +2930,39 @@ static void hclge_get_misc_vector(struct
> hclge_dev *hdev)
> hdev->num_msi_used += 1;
> }
>
> +static void hclge_irq_affinity_notify(struct irq_affinity_notify
> *notify,
> + const cpumask_t *mask)
> +{
> + struct hclge_dev *hdev = container_of(notify, struct hclge_dev,
> + affinity_notify);
> +
> + cpumask_copy(&hdev->affinity_mask, mask);
> + del_timer_sync(&hdev->service_timer);
> + hdev->service_timer.expires = jiffies + HZ;
> + add_timer_on(&hdev->service_timer, cpumask_first(&hdev-
> >affinity_mask));
> +}

I don't see any relation between your misc irq vector and &hdev-
>service_timer, to me this looks like an abuse of the irq affinity API
to allow the user to move the service timer affinity.

> +
> +static void hclge_irq_affinity_release(struct kref *ref)
> +{
> +}
> +
> +static void hclge_misc_affinity_setup(struct hclge_dev *hdev)
> +{
> + irq_set_affinity_hint(hdev->misc_vector.vector_irq,
> + &hdev->affinity_mask);
> +
> + hdev->affinity_notify.notify = hclge_irq_affinity_notify;
> + hdev->affinity_notify.release = hclge_irq_affinity_release;
> + irq_set_affinity_notifier(hdev->misc_vector.vector_irq,
> + &hdev->affinity_notify);
> +}
> +
> +static void hclge_misc_affinity_teardown(struct hclge_dev *hdev)
> +{
> + irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL);
> + irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL);
> +}
> +
> static int hclge_misc_irq_init(struct hclge_dev *hdev)
> {
> int ret;
> @@ -6151,7 +6193,10 @@ static void hclge_set_timer_task(struct
> hnae3_handle *handle, bool enable)
> struct hclge_dev *hdev = vport->back;
>
> if (enable) {
> - mod_timer(&hdev->service_timer, jiffies + HZ);
> + del_timer_sync(&hdev->service_timer);
> + hdev->service_timer.expires = jiffies + HZ;
> + add_timer_on(&hdev->service_timer,
> + cpumask_first(&hdev->affinity_mask));
> } else {
> del_timer_sync(&hdev->service_timer);
> cancel_work_sync(&hdev->service_task);
> @@ -8809,6 +8854,11 @@ static int hclge_init_ae_dev(struct
> hnae3_ae_dev *ae_dev)
> INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
> INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
>
> + /* Setup affinity after service timer setup because
> add_timer_on
> + * is called in affinity notify.
> + */
> + hclge_misc_affinity_setup(hdev);
> +
> hclge_clear_all_event_cause(hdev);
> hclge_clear_resetting_state(hdev);
>
> @@ -8970,6 +9020,7 @@ static void hclge_uninit_ae_dev(struct
> hnae3_ae_dev *ae_dev)
> struct hclge_dev *hdev = ae_dev->priv;
> struct hclge_mac *mac = &hdev->hw.mac;
>
> + hclge_misc_affinity_teardown(hdev);
> hclge_state_uninit(hdev);
>
> if (mac->phydev)
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> index 6a12285..14df23c 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
> @@ -864,6 +864,10 @@ struct hclge_dev {
>
> DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
> HCLGE_MAC_TNL_LOG_SIZE);
> +
> + /* affinity mask and notify for misc interrupt */
> + cpumask_t affinity_mask;
> + struct irq_affinity_notify affinity_notify;
> };
>
> /* VPort level vlan tag configuration for TX direction */