RE: [PATCH bpf-next v4 2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata
From: Song, Yoong Siang
Date: Thu Jan 09 2025 - 02:09:26 EST
On Wednesday, January 8, 2025 12:58 AM, Stanislav Fomichev <stfomichev@xxxxxxxxx> wrote:
>On 01/06, Song Yoong Siang wrote:
>> Add Launch Time hw offload request to xdp_hw_metadata. User can configure
>> the delta of launch time to HW RX-time by using "-l" argument. The default
>> delta is 100,000,000 nanosecond.
>>
>> Signed-off-by: Song Yoong Siang <yoong.siang.song@xxxxxxxxx>
>> ---
>> tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
>> 1 file changed, 27 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c
>b/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> index 6f7b15d6c6ed..795c1d14e02d 100644
>> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> @@ -13,6 +13,7 @@
>> * - UDP 9091 packets trigger TX reply
>> * - TX HW timestamp is requested and reported back upon completion
>> * - TX checksum is requested
>> + * - TX launch time HW offload is requested for transmission
>> */
>>
>> #include <test_progs.h>
>> @@ -64,6 +65,8 @@ int rxq;
>> bool skip_tx;
>> __u64 last_hw_rx_timestamp;
>> __u64 last_xdp_rx_timestamp;
>> +__u64 last_launch_time;
>> +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
>>
>> void test__fail(void) { /* for network_helpers.c */ }
>>
>> @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t
>clock_id)
>> if (meta->completion.tx_timestamp) {
>> __u64 ref_tstamp = gettime(clock_id);
>>
>> + print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
>> + last_launch_time, meta-
>>completion.tx_timestamp);
>> print_tstamp_delta("HW TX-complete-time", "User TX-complete-
>time",
>> meta->completion.tx_timestamp, ref_tstamp);
>> print_tstamp_delta("XDP RX-time", "User TX-complete-time",
>> @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet,
>clockid_t clock_id)
>> xsk, ntohs(udph->check), ntohs(want_csum),
>> meta->request.csum_start, meta->request.csum_offset);
>>
>> + /* Set the value of launch time */
>> + meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
>> + meta->request.launch_time = last_hw_rx_timestamp +
>> + launch_time_delta_to_hw_rx_timestamp;
>> + last_launch_time = meta->request.launch_time;
>> + print_tstamp_delta("HW RX-time", "HW Launch-time",
>last_hw_rx_timestamp,
>> + meta->request.launch_time);
>> +
>> memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
>> tx_desc->options |= XDP_TX_METADATA;
>> tx_desc->len = len;
>> @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet,
>clockid_t clock_id)
>> xsk_ring_prod__submit(&xsk->tx, 1);
>> }
>>
>> +#define SLEEP_PER_ITERATION_IN_US 10
>> +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
>> +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
>> static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>clock_id)
>> {
>> const struct xdp_desc *rx_desc;
>> struct pollfd fds[rxq + 1];
>> + int max_iterations;
>> __u64 comp_addr;
>> __u64 addr;
>> __u32 idx = 0;
>> @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int
>server_fd, clockid_t
>> fds[i].revents = 0;
>> }
>>
>> + /* Calculate max iterations to wait for transmit completion */
>> + max_iterations =
>MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
>> +
>> fds[rxq].fd = server_fd;
>> fds[rxq].events = POLLIN;
>> fds[rxq].revents = 0;
>> @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq,
>int server_fd, clockid_t
>> if (ret)
>> printf("kick_tx ret=%d\n", ret);
>>
>
>[..]
>
>> - for (int j = 0; j < 500; j++) {
>> + for (int j = 0; j < max_iterations; j++) {
>> if (complete_tx(xsk, clock_id))
>> break;
>> - usleep(10);
>> +
> usleep(SLEEP_PER_ITERATION_IN_US);
>
>nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this
>to the following?
>
>static u64 now(void)
>{
> clock_gettime(...);
> return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
>}
>
>/* wait 5 seconds + cover launch time */
>deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp;
>while (true) {
> if (complete_tx())
> break;
> if (now() >= deadline)
> break;
> usleep(10);
>}
>
>It is a bit more readable than converting time to wait to the
>iterations..
Agree that your code is more readable.
I will use your suggestion in next version.
Thanks & Regards
Siang