XDP hardware hints discussion mail archive
 help / color / mirror / Atom feed
* [xdp-hints] [PATCH bpf-next v4 2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata
@ 2025-01-06 13:56 Song Yoong Siang
  2025-01-07 16:57 ` [xdp-hints] " Stanislav Fomichev
  0 siblings, 1 reply; 3+ messages in thread
From: Song Yoong Siang @ 2025-01-06 13:56 UTC (permalink / raw)
  To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Willem de Bruijn, Florian Bezdeka, Donald Hunter,
	Jonathan Corbet, Bjorn Topel, Magnus Karlsson,
	Maciej Fijalkowski, Jonathan Lemon, Andrew Lunn,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend, Joe Damato, Stanislav Fomichev, Xuan Zhuo,
	Mina Almasry, Daniel Jurgens, Song Yoong Siang, Amritha Nambiar,
	Andrii Nakryiko, Eduard Zingerman, Mykola Lysenko,
	Martin KaFai Lau, Song Liu, Yonghong Song, KP Singh, Hao Luo,
	Jiri Olsa, Shuah Khan, Alexandre Torgue, Jose Abreu,
	Maxime Coquelin, Tony Nguyen, Przemek Kitszel
  Cc: netdev, linux-kernel, linux-doc, bpf, linux-kselftest,
	linux-stm32, linux-arm-kernel, intel-wired-lan, xdp-hints

Add Launch Time hw offload request to xdp_hw_metadata. User can configure
the delta of launch time to HW RX-time by using "-l" argument. The default
delta is 100,000,000 nanosecond.

Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f7b15d6c6ed..795c1d14e02d 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -13,6 +13,7 @@
  * - UDP 9091 packets trigger TX reply
  * - TX HW timestamp is requested and reported back upon completion
  * - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
  */
 
 #include <test_progs.h>
@@ -64,6 +65,8 @@ int rxq;
 bool skip_tx;
 __u64 last_hw_rx_timestamp;
 __u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
 
 void test__fail(void) { /* for network_helpers.c */ }
 
@@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
 	if (meta->completion.tx_timestamp) {
 		__u64 ref_tstamp = gettime(clock_id);
 
+		print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
+				   last_launch_time, meta->completion.tx_timestamp);
 		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
 				   meta->completion.tx_timestamp, ref_tstamp);
 		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
@@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
 	       xsk, ntohs(udph->check), ntohs(want_csum),
 	       meta->request.csum_start, meta->request.csum_offset);
 
+	/* Set the value of launch time */
+	meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+	meta->request.launch_time = last_hw_rx_timestamp +
+				    launch_time_delta_to_hw_rx_timestamp;
+	last_launch_time = meta->request.launch_time;
+	print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp,
+			   meta->request.launch_time);
+
 	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
 	tx_desc->options |= XDP_TX_METADATA;
 	tx_desc->len = len;
@@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
 	xsk_ring_prod__submit(&xsk->tx, 1);
 }
 
+#define SLEEP_PER_ITERATION_IN_US 10
+#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
+#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
 {
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds[rxq + 1];
+	int max_iterations;
 	__u64 comp_addr;
 	__u64 addr;
 	__u32 idx = 0;
@@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 		fds[i].revents = 0;
 	}
 
+	/* Calculate max iterations to wait for transmit completion */
+	max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
+
 	fds[rxq].fd = server_fd;
 	fds[rxq].events = POLLIN;
 	fds[rxq].revents = 0;
@@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 					if (ret)
 						printf("kick_tx ret=%d\n", ret);
 
-					for (int j = 0; j < 500; j++) {
+					for (int j = 0; j < max_iterations; j++) {
 						if (complete_tx(xsk, clock_id))
 							break;
-						usleep(10);
+						usleep(SLEEP_PER_ITERATION_IN_US);
 					}
 				}
 			}
@@ -608,6 +628,7 @@ static void print_usage(void)
 		"  -h    Display this help and exit\n\n"
 		"  -m    Enable multi-buffer XDP for larger MTU\n"
 		"  -r    Don't generate AF_XDP reply (rx metadata only)\n"
+		"  -l    Delta of launch time to HW RX-time in ns (default: 100,000,000ns)\n"
 		"Generate test packets on the other machine with:\n"
 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
 
@@ -618,7 +639,7 @@ static void read_args(int argc, char *argv[])
 {
 	int opt;
 
-	while ((opt = getopt(argc, argv, "chmr")) != -1) {
+	while ((opt = getopt(argc, argv, "chmrl:")) != -1) {
 		switch (opt) {
 		case 'c':
 			bind_flags &= ~XDP_USE_NEED_WAKEUP;
@@ -634,6 +655,9 @@ static void read_args(int argc, char *argv[])
 		case 'r':
 			skip_tx = true;
 			break;
+		case 'l':
+			launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+			break;
 		case '?':
 			if (isprint(optopt))
 				fprintf(stderr, "Unknown option: -%c\n", optopt);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [xdp-hints] Re: [PATCH bpf-next v4 2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata
  2025-01-06 13:56 [xdp-hints] [PATCH bpf-next v4 2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata Song Yoong Siang
@ 2025-01-07 16:57 ` Stanislav Fomichev
  2025-01-09  7:08   ` Song, Yoong Siang
  0 siblings, 1 reply; 3+ messages in thread
From: Stanislav Fomichev @ 2025-01-07 16:57 UTC (permalink / raw)
  To: Song Yoong Siang
  Cc: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Willem de Bruijn, Florian Bezdeka, Donald Hunter,
	Jonathan Corbet, Bjorn Topel, Magnus Karlsson,
	Maciej Fijalkowski, Jonathan Lemon, Andrew Lunn,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend, Joe Damato, Stanislav Fomichev, Xuan Zhuo,
	Mina Almasry, Daniel Jurgens, Amritha Nambiar, Andrii Nakryiko,
	Eduard Zingerman, Mykola Lysenko, Martin KaFai Lau, Song Liu,
	Yonghong Song, KP Singh, Hao Luo, Jiri Olsa, Shuah Khan,
	Alexandre Torgue, Jose Abreu, Maxime Coquelin, Tony Nguyen,
	Przemek Kitszel, netdev, linux-kernel, linux-doc, bpf,
	linux-kselftest, linux-stm32, linux-arm-kernel, intel-wired-lan,
	xdp-hints

On 01/06, Song Yoong Siang wrote:
> Add Launch Time hw offload request to xdp_hw_metadata. User can configure
> the delta of launch time to HW RX-time by using "-l" argument. The default
> delta is 100,000,000 nanosecond.
> 
> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
> ---
>  tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
>  1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> index 6f7b15d6c6ed..795c1d14e02d 100644
> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> @@ -13,6 +13,7 @@
>   * - UDP 9091 packets trigger TX reply
>   * - TX HW timestamp is requested and reported back upon completion
>   * - TX checksum is requested
> + * - TX launch time HW offload is requested for transmission
>   */
>  
>  #include <test_progs.h>
> @@ -64,6 +65,8 @@ int rxq;
>  bool skip_tx;
>  __u64 last_hw_rx_timestamp;
>  __u64 last_xdp_rx_timestamp;
> +__u64 last_launch_time;
> +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
>  
>  void test__fail(void) { /* for network_helpers.c */ }
>  
> @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
>  	if (meta->completion.tx_timestamp) {
>  		__u64 ref_tstamp = gettime(clock_id);
>  
> +		print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
> +				   last_launch_time, meta->completion.tx_timestamp);
>  		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
>  				   meta->completion.tx_timestamp, ref_tstamp);
>  		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
> @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
>  	       xsk, ntohs(udph->check), ntohs(want_csum),
>  	       meta->request.csum_start, meta->request.csum_offset);
>  
> +	/* Set the value of launch time */
> +	meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
> +	meta->request.launch_time = last_hw_rx_timestamp +
> +				    launch_time_delta_to_hw_rx_timestamp;
> +	last_launch_time = meta->request.launch_time;
> +	print_tstamp_delta("HW RX-time", "HW Launch-time", last_hw_rx_timestamp,
> +			   meta->request.launch_time);
> +
>  	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
>  	tx_desc->options |= XDP_TX_METADATA;
>  	tx_desc->len = len;
> @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
>  	xsk_ring_prod__submit(&xsk->tx, 1);
>  }
>  
> +#define SLEEP_PER_ITERATION_IN_US 10
> +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
> +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
>  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
>  {
>  	const struct xdp_desc *rx_desc;
>  	struct pollfd fds[rxq + 1];
> +	int max_iterations;
>  	__u64 comp_addr;
>  	__u64 addr;
>  	__u32 idx = 0;
> @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>  		fds[i].revents = 0;
>  	}
>  
> +	/* Calculate max iterations to wait for transmit completion */
> +	max_iterations = MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
> +
>  	fds[rxq].fd = server_fd;
>  	fds[rxq].events = POLLIN;
>  	fds[rxq].revents = 0;
> @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>  					if (ret)
>  						printf("kick_tx ret=%d\n", ret);
>  

[..]

> -					for (int j = 0; j < 500; j++) {
> +					for (int j = 0; j < max_iterations; j++) {
>  						if (complete_tx(xsk, clock_id))
>  							break;
> -						usleep(10);
> +						usleep(SLEEP_PER_ITERATION_IN_US);

nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this
to the following?

static u64 now(void)
{
	clock_gettime(...);
	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}

/* wait 5 seconds + cover launch time */
deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp;
while (true) {
	if (complete_tx())
		break;
	if (now() >= deadline)
		break;
	usleep(10);
}

It is a bit more readable than converting time to wait to the
iterations..

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [xdp-hints] Re: [PATCH bpf-next v4 2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata
  2025-01-07 16:57 ` [xdp-hints] " Stanislav Fomichev
@ 2025-01-09  7:08   ` Song, Yoong Siang
  0 siblings, 0 replies; 3+ messages in thread
From: Song, Yoong Siang @ 2025-01-09  7:08 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Simon Horman, Willem de Bruijn, Bezdeka, Florian, Donald Hunter,
	Jonathan Corbet, Bjorn Topel, Karlsson, Magnus, Fijalkowski,
	Maciej, Jonathan Lemon, Andrew Lunn, Alexei Starovoitov,
	Daniel Borkmann, Jesper Dangaard Brouer, John Fastabend, Damato,
	Joe, Stanislav Fomichev, Xuan Zhuo, Mina Almasry, Daniel Jurgens,
	Amritha Nambiar, Andrii Nakryiko, Eduard Zingerman,
	Mykola Lysenko, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, Hao Luo, Jiri Olsa, Shuah Khan, Alexandre Torgue,
	Jose Abreu, Maxime Coquelin, Nguyen, Anthony L, Kitszel,
	Przemyslaw, netdev, linux-kernel, linux-doc, bpf,
	linux-kselftest, linux-stm32, linux-arm-kernel, intel-wired-lan,
	xdp-hints

On Wednesday, January 8, 2025 12:58 AM, Stanislav Fomichev <stfomichev@gmail.com> wrote:
>On 01/06, Song Yoong Siang wrote:
>> Add Launch Time hw offload request to xdp_hw_metadata. User can configure
>> the delta of launch time to HW RX-time by using "-l" argument. The default
>> delta is 100,000,000 nanosecond.
>>
>> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
>> ---
>>  tools/testing/selftests/bpf/xdp_hw_metadata.c | 30 +++++++++++++++++--
>>  1 file changed, 27 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c
>b/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> index 6f7b15d6c6ed..795c1d14e02d 100644
>> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
>> @@ -13,6 +13,7 @@
>>   * - UDP 9091 packets trigger TX reply
>>   * - TX HW timestamp is requested and reported back upon completion
>>   * - TX checksum is requested
>> + * - TX launch time HW offload is requested for transmission
>>   */
>>
>>  #include <test_progs.h>
>> @@ -64,6 +65,8 @@ int rxq;
>>  bool skip_tx;
>>  __u64 last_hw_rx_timestamp;
>>  __u64 last_xdp_rx_timestamp;
>> +__u64 last_launch_time;
>> +__u64 launch_time_delta_to_hw_rx_timestamp = 100000000; /* 0.1 second */
>>
>>  void test__fail(void) { /* for network_helpers.c */ }
>>
>> @@ -298,6 +301,8 @@ static bool complete_tx(struct xsk *xsk, clockid_t
>clock_id)
>>  	if (meta->completion.tx_timestamp) {
>>  		__u64 ref_tstamp = gettime(clock_id);
>>
>> +		print_tstamp_delta("HW Launch-time", "HW TX-complete-time",
>> +				   last_launch_time, meta-
>>completion.tx_timestamp);
>>  		print_tstamp_delta("HW TX-complete-time", "User TX-complete-
>time",
>>  				   meta->completion.tx_timestamp, ref_tstamp);
>>  		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
>> @@ -395,6 +400,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet,
>clockid_t clock_id)
>>  	       xsk, ntohs(udph->check), ntohs(want_csum),
>>  	       meta->request.csum_start, meta->request.csum_offset);
>>
>> +	/* Set the value of launch time */
>> +	meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
>> +	meta->request.launch_time = last_hw_rx_timestamp +
>> +				    launch_time_delta_to_hw_rx_timestamp;
>> +	last_launch_time = meta->request.launch_time;
>> +	print_tstamp_delta("HW RX-time", "HW Launch-time",
>last_hw_rx_timestamp,
>> +			   meta->request.launch_time);
>> +
>>  	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
>>  	tx_desc->options |= XDP_TX_METADATA;
>>  	tx_desc->len = len;
>> @@ -402,10 +415,14 @@ static void ping_pong(struct xsk *xsk, void *rx_packet,
>clockid_t clock_id)
>>  	xsk_ring_prod__submit(&xsk->tx, 1);
>>  }
>>
>> +#define SLEEP_PER_ITERATION_IN_US 10
>> +#define SLEEP_PER_ITERATION_IN_NS (SLEEP_PER_ITERATION_IN_US * 1000)
>> +#define MAX_ITERATION(x) (((x) / SLEEP_PER_ITERATION_IN_NS) + 500)
>>  static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
>clock_id)
>>  {
>>  	const struct xdp_desc *rx_desc;
>>  	struct pollfd fds[rxq + 1];
>> +	int max_iterations;
>>  	__u64 comp_addr;
>>  	__u64 addr;
>>  	__u32 idx = 0;
>> @@ -418,6 +435,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int
>server_fd, clockid_t
>>  		fds[i].revents = 0;
>>  	}
>>
>> +	/* Calculate max iterations to wait for transmit completion */
>> +	max_iterations =
>MAX_ITERATION(launch_time_delta_to_hw_rx_timestamp);
>> +
>>  	fds[rxq].fd = server_fd;
>>  	fds[rxq].events = POLLIN;
>>  	fds[rxq].revents = 0;
>> @@ -477,10 +497,10 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq,
>int server_fd, clockid_t
>>  					if (ret)
>>  						printf("kick_tx ret=%d\n", ret);
>>
>
>[..]
>
>> -					for (int j = 0; j < 500; j++) {
>> +					for (int j = 0; j < max_iterations; j++) {
>>  						if (complete_tx(xsk, clock_id))
>>  							break;
>> -						usleep(10);
>> +
>	usleep(SLEEP_PER_ITERATION_IN_US);
>
>nit: instead of doing MAX_ITERATION/max_iterations, can we simplify this
>to the following?
>
>static u64 now(void)
>{
>	clock_gettime(...);
>	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
>}
>
>/* wait 5 seconds + cover launch time */
>deadline = now() + 5 * NSEC_PER_SEC + launch_time_delta_to_hw_rx_timestamp;
>while (true) {
>	if (complete_tx())
>		break;
>	if (now() >= deadline)
>		break;
>	usleep(10);
>}
>
>It is a bit more readable than converting time to wait to the
>iterations..

Agree that your code is more readable.
I will use your suggestion in next version.

Thanks & Regards
Siang

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-01-09  7:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-01-06 13:56 [xdp-hints] [PATCH bpf-next v4 2/4] selftests/bpf: Add Launch Time request to xdp_hw_metadata Song Yoong Siang
2025-01-07 16:57 ` [xdp-hints] " Stanislav Fomichev
2025-01-09  7:08   ` Song, Yoong Siang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox