RE: [Patch v4 06/12] net: mana: Define data structures for protection domain and memory registration

From: Ajay Sharma
Date: Wed Jul 13 2022 - 00:39:27 EST


Please see inline.

-----Original Message-----
From: Dexuan Cui <decui@xxxxxxxxxxxxx>
Sent: Sunday, July 10, 2022 8:29 PM
To: Long Li <longli@xxxxxxxxxxxxx>; KY Srinivasan <kys@xxxxxxxxxxxxx>; Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>; Stephen Hemminger <sthemmin@xxxxxxxxxxxxx>; Wei Liu <wei.liu@xxxxxxxxxx>; David S. Miller <davem@xxxxxxxxxxxxx>; Jakub Kicinski <kuba@xxxxxxxxxx>; Paolo Abeni <pabeni@xxxxxxxxxx>; Jason Gunthorpe <jgg@xxxxxxxx>; Leon Romanovsky <leon@xxxxxxxxxx>; edumazet@xxxxxxxxxx; shiraz.saleem@xxxxxxxxx; Ajay Sharma <sharmaajay@xxxxxxxxxxxxx>
Cc: linux-hyperv@xxxxxxxxxxxxxxx; netdev@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx
Subject: RE: [Patch v4 06/12] net: mana: Define data structures for protection domain and memory registration

> From: longli@xxxxxxxxxxxxxxxxx <longli@xxxxxxxxxxxxxxxxx>
> Sent: Wednesday, June 15, 2022 7:07 PM
>
> The MANA hardware support protection domain and memory registration
> for
s/support/supports

> diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h
> b/drivers/net/ethernet/microsoft/mana/gdma.h
> index f945755760dc..b1bec8ab5695 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma.h
> +++ b/drivers/net/ethernet/microsoft/mana/gdma.h
> @@ -27,6 +27,10 @@ enum gdma_request_type {
> GDMA_CREATE_DMA_REGION = 25,
> GDMA_DMA_REGION_ADD_PAGES = 26,
> GDMA_DESTROY_DMA_REGION = 27,
> + GDMA_CREATE_PD = 29,
> + GDMA_DESTROY_PD = 30,
> + GDMA_CREATE_MR = 31,
> + GDMA_DESTROY_MR = 32,
These are not used in this patch. They're used in the 12th patch for the first time. Can we move these to that patch?

> #define GDMA_RESOURCE_DOORBELL_PAGE 27
> @@ -59,6 +63,8 @@ enum {
> GDMA_DEVICE_MANA = 2,
> };
>
> +typedef u64 gdma_obj_handle_t;
> +
> struct gdma_resource {
> /* Protect the bitmap */
> spinlock_t lock;
> @@ -192,7 +198,7 @@ struct gdma_mem_info {
> u64 length;
>
> /* Allocated by the PF driver */
> - u64 gdma_region;
> + gdma_obj_handle_t dma_region_handle;
The old name "gdma_region" is shorter and it has "gdma"
rather than "dma".

The new name is longer. When one starts to read the code for the first time, I feel that "dma_region_handle" might be confusing as it's similar to "dma_handle" (which is the DMA address returned by dma_alloc_coherent()). "dma_region_handle" is an integer rather than a memory address.

You use the new name probably because there is a "mr_handle "
in the 12 patch. I prefer the old name, though the new name is also ok to me. If you decide to use the new name, it would be great if this patch could split into two patches: one for the renaming only, and the other for the real changes.

> #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 @@ -599,7 +605,7 @@ struct
> gdma_create_queue_req {
> u32 reserved1;
> u32 pdid;
> u32 doolbell_id;
> - u64 gdma_region;
> + gdma_obj_handle_t gdma_region;
If we decide to use the new name "dma_region_handle", should we change the field/param names in the below structs and functions as well (this may not be a complete list)?
struct mana_ib_wq
struct mana_ib_cq
mana_ib_gd_create_dma_region
mana_ib_gd_destroy_dma_region

> u32 reserved2;
> u32 queue_size;
> u32 log2_throttle_limit;
> @@ -626,6 +632,28 @@ struct gdma_disable_queue_req {
> u32 alloc_res_id_on_creation;
> }; /* HW DATA */
>
> +enum atb_page_size {
> + ATB_PAGE_SIZE_4K,
> + ATB_PAGE_SIZE_8K,
> + ATB_PAGE_SIZE_16K,
> + ATB_PAGE_SIZE_32K,
> + ATB_PAGE_SIZE_64K,
> + ATB_PAGE_SIZE_128K,
> + ATB_PAGE_SIZE_256K,
> + ATB_PAGE_SIZE_512K,
> + ATB_PAGE_SIZE_1M,
> + ATB_PAGE_SIZE_2M,
> + ATB_PAGE_SIZE_MAX,
> +};
> +
> +enum gdma_mr_access_flags {
> + GDMA_ACCESS_FLAG_LOCAL_READ = (1 << 0),
> + GDMA_ACCESS_FLAG_LOCAL_WRITE = (1 << 1),
> + GDMA_ACCESS_FLAG_REMOTE_READ = (1 << 2),
> + GDMA_ACCESS_FLAG_REMOTE_WRITE = (1 << 3),
> + GDMA_ACCESS_FLAG_REMOTE_ATOMIC = (1 << 4), };
It would be better to use BIT_ULL(0), BIT_ULL(1), etc.
Agreed, updated in the new patch.

> /* GDMA_CREATE_DMA_REGION */
> struct gdma_create_dma_region_req {
> struct gdma_req_hdr hdr;
> @@ -652,14 +680,14 @@ struct gdma_create_dma_region_req {
>
> struct gdma_create_dma_region_resp {
> struct gdma_resp_hdr hdr;
> - u64 gdma_region;
> + gdma_obj_handle_t dma_region_handle;
> }; /* HW DATA */
>
> /* GDMA_DMA_REGION_ADD_PAGES */
> struct gdma_dma_region_add_pages_req {
> struct gdma_req_hdr hdr;
>
> - u64 gdma_region;
> + gdma_obj_handle_t dma_region_handle;
>
> u32 page_addr_list_len;
> u32 reserved3;
> @@ -671,9 +699,114 @@ struct gdma_dma_region_add_pages_req { struct
> gdma_destroy_dma_region_req {
> struct gdma_req_hdr hdr;
>
> - u64 gdma_region;
> + gdma_obj_handle_t dma_region_handle;
> }; /* HW DATA */
>
> +enum gdma_pd_flags {
> + GDMA_PD_FLAG_ALLOW_GPA_MR = (1 << 0),
> + GDMA_PD_FLAG_ALLOW_FMR_MR = (1 << 1), };
Use BIT_ULL(0), BIT_ULL(1) ?
Agreed and updated the patch

> +struct gdma_create_pd_req {
> + struct gdma_req_hdr hdr;
> + enum gdma_pd_flags flags;
> + u32 reserved;
> +};
> +
> +struct gdma_create_pd_resp {
> + struct gdma_resp_hdr hdr;
> + gdma_obj_handle_t pd_handle;
> + u32 pd_id;
> + u32 reserved;
> +};
> +
> +struct gdma_destroy_pd_req {
> + struct gdma_req_hdr hdr;
> + gdma_obj_handle_t pd_handle;
> +};
> +
> +struct gdma_destory_pd_resp {
> + struct gdma_resp_hdr hdr;
> +};
> +
> +enum gdma_mr_type {
> + /* Guest Physical Address - MRs of this type allow access
> + * to any DMA-mapped memory using bus-logical address
> + */
> + GDMA_MR_TYPE_GPA = 1,
> +
> + /* Guest Virtual Address - MRs of this type allow access
> + * to memory mapped by PTEs associated with this MR using a virtual
> + * address that is set up in the MST
> + */
> + GDMA_MR_TYPE_GVA,
> +
> + /* Fast Memory Register - Like GVA but the MR is initially put in the
> + * FREE state (as opposed to Valid), and the specified number of
> + * PTEs are reserved for future fast memory reservations.
> + */
> + GDMA_MR_TYPE_FMR,
> +};
> +
> +struct gdma_create_mr_params {
> + gdma_obj_handle_t pd_handle;
> + enum gdma_mr_type mr_type;
> + union {
> + struct {
> + gdma_obj_handle_t dma_region_handle;
> + u64 virtual_address;
> + enum gdma_mr_access_flags access_flags;
> + } gva;
Add an empty line to make it more readable?
Done.
> + struct {
> + enum gdma_mr_access_flags access_flags;
> + } gpa;
Add an empty line?

> + struct {
> + enum atb_page_size page_size;
> + u32 reserved_pte_count;
> + } fmr;
> + };
> +};

The definition of struct gdma_create_mr_params is not naturally aligned.
This can potenially cause issues.
This is union and so the biggest element is aligned to word. I feel since this is not passed to the hw it should be fine.

According to my test, sizeof(struct gdma_create_mr_params) is 40 bytes, meaning the compiler adds two "hidden" fields:

struct gdma_create_mr_params {
gdma_obj_handle_t pd_handle; // offset = 0
enum gdma_mr_type mr_type; // offset = 8
+ u32 hidden_field_a;
union { // offset = 0x10
struct {
gdma_obj_handle_t dma_region_handle; // offset =0x10
u64 virtual_address; // offset =0x18
enum gdma_mr_access_flags access_flags; // offset =0x20
+ u32 hidden_field_b;
} gva;

We'll run into trouble some day if the Linux VF driver or the host PF driver adds something like __attribute__((packed)).

Can we work with the host team to improve the definition? If it's hard/impossible to change the PF driver side definition, both sides should at least explicitly define the two hidden fields as reserved fields.

BTW, can we assume the size of "enum" is 4 bytes? I prefer using u32 explicitly when a struct is used to talk to the PF driver or the device.

If we decide to use "enum", I suggest we add BUILD_BUG_ON(sizeof(struct gdma_create_mr_params) != 40) to make sure the assumptin is true.

BTW, Haiyang added "/* HW DATA */ " to other definitions, e.g. gdma_create_queue_resp. Can you please add the same comment for consistency?

> +struct gdma_create_mr_request {
> + struct gdma_req_hdr hdr;
> + gdma_obj_handle_t pd_handle;
> + enum gdma_mr_type mr_type;
> + u32 reserved;
> +
> + union {
> + struct {
> + enum gdma_mr_access_flags access_flags;
> + } gpa;
> +
> + struct {
> + gdma_obj_handle_t dma_region_handle;
> + u64 virtual_address;
> + enum gdma_mr_access_flags access_flags;

Similarly, there is a hidden u32 field here. We should explicitly define it.

> + } gva;
Can we use the same order of "gva; gpa" used in struct gdma_create_mr_params?
Done, although it shouldn't matter in union case.

> + struct {
> + enum atb_page_size page_size;
> + u32 reserved_pte_count;
> + } fmr;
> + };
> +};

Add BUILD_BUG_ON(sizeof(struct gdma_create_mr_request) != 80) ?
Add /* HW DATA */ ?

> +struct gdma_create_mr_response {
> + struct gdma_resp_hdr hdr;
> + gdma_obj_handle_t mr_handle;
> + u32 lkey;
> + u32 rkey;
> +};
> +
> +struct gdma_destroy_mr_request {
> + struct gdma_req_hdr hdr;
> + gdma_obj_handle_t mr_handle;
> +};
> +
> +struct gdma_destroy_mr_response {
> + struct gdma_resp_hdr hdr;
> +};
> +

None of the new defines are really used in this patch:

+enum atb_page_size {
+enum gdma_mr_access_flags {
+enum gdma_pd_flags {
+struct gdma_create_pd_req {
+struct gdma_create_pd_resp {
+struct gdma_destroy_pd_req {
+struct gdma_destory_pd_resp {
+enum gdma_mr_type {
+struct gdma_create_mr_params {
+struct gdma_create_mr_request {
+struct gdma_create_mr_response {
+struct gdma_destroy_mr_request {
+struct gdma_destroy_mr_response

The new defines are used in the 12th patch for the first time.
Can we move these to that patch or at least move these defines to before the 12th patch?