XDP hardware hints discussion mail archive
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Stanislav Fomichev <sdf@google.com>
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	song@kernel.org, yhs@fb.com, john.fastabend@gmail.com,
	kpsingh@kernel.org, haoluo@google.com, jolsa@kernel.org,
	David Ahern <dsahern@gmail.com>, Jakub Kicinski <kuba@kernel.org>,
	Willem de Bruijn <willemb@google.com>,
	Jesper Dangaard Brouer <brouer@redhat.com>,
	Anatoly Burakov <anatoly.burakov@intel.com>,
	Alexander Lobakin <alexandr.lobakin@intel.com>,
	Magnus Karlsson <magnus.karlsson@gmail.com>,
	Maryam Tahhan <mtahhan@redhat.com>,
	xdp-hints@xdp-project.net, netdev@vger.kernel.org,
	bpf@vger.kernel.org
Subject: [xdp-hints] Re: [RFC bpf-next v2 06/14] xdp: Carry over xdp metadata into skb context
Date: Tue, 8 Nov 2022 20:19:13 -0800	[thread overview]
Message-ID: <6f57370f-7ec3-07dd-54df-04423cab6d1f@linux.dev> (raw)
In-Reply-To: <9a8fefe4-2fcb-95b7-cda0-06509feee78e@linux.dev>

On 11/8/22 7:07 PM, Martin KaFai Lau wrote:
> On 11/8/22 1:54 PM, Stanislav Fomichev wrote:
>> On Mon, Nov 7, 2022 at 2:02 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>>
>>> On 11/3/22 8:25 PM, Stanislav Fomichev wrote:
>>>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>>>> index 59c9fd55699d..dba857f212d7 100644
>>>> --- a/include/linux/skbuff.h
>>>> +++ b/include/linux/skbuff.h
>>>> @@ -4217,9 +4217,13 @@ static inline bool skb_metadata_differs(const struct 
>>>> sk_buff *skb_a,
>>>>               true : __skb_metadata_differs(skb_a, skb_b, len_a);
>>>>    }
>>>>
>>>> +void skb_metadata_import_from_xdp(struct sk_buff *skb, size_t len);
>>>> +
>>>>    static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len)
>>>>    {
>>>>        skb_shinfo(skb)->meta_len = meta_len;
>>>> +     if (meta_len)
>>>> +             skb_metadata_import_from_xdp(skb, meta_len);
>>>>    }
>>>>
>>> [ ... ]
>>>
>>>> +struct xdp_to_skb_metadata {
>>>> +     u32 magic; /* xdp_metadata_magic */
>>>> +     u64 rx_timestamp;
>>>> +} __randomize_layout;
>>>> +
>>>> +struct bpf_patch;
>>>> +
>>>
>>> [ ... ]
>>>
>>>> +void skb_metadata_import_from_xdp(struct sk_buff *skb, size_t len)
>>>> +{
>>>> +     struct xdp_to_skb_metadata *meta = (void *)(skb_mac_header(skb) - len);
>>>> +
>>>> +     /* Optional SKB info, currently missing:
>>>> +      * - HW checksum info           (skb->ip_summed)
>>>> +      * - HW RX hash                 (skb_set_hash)
>>>> +      * - RX ring dev queue index    (skb_record_rx_queue)
>>>> +      */
>>>> +
>>>> +     if (len != sizeof(struct xdp_to_skb_metadata))
>>>> +             return;
>>>> +
>>>> +     if (meta->magic != xdp_metadata_magic)
>>>> +             return;
>>>> +
>>>> +     if (meta->rx_timestamp) {
>>>> +             *skb_hwtstamps(skb) = (struct skb_shared_hwtstamps){
>>>> +                     .hwtstamp = ns_to_ktime(meta->rx_timestamp),
>>>> +             };
>>>> +     }
>>>> +}
>>>
>>> Considering the metadata will affect the gro, should the meta be cleared after
>>> importing to the skb?
>>
>> Yeah, good suggestion, will clear it here.
>>
>>> [ ... ]
>>>
>>>> +/* Since we're not actually doing a call but instead rewriting
>>>> + * in place, we can only afford to use R0-R5 scratch registers.
>>>> + *
>>>> + * We reserve R1 for bpf_xdp_metadata_export_to_skb and let individual
>>>> + * metadata kfuncs use only R0,R4-R5.
>>>> + *
>>>> + * The above also means we _cannot_ easily call any other helper/kfunc
>>>> + * because there is no place for us to preserve our R1 argument;
>>>> + * existing R6-R9 belong to the callee.
>>>> + */
>>>> +void xdp_metadata_export_to_skb(const struct bpf_prog *prog, struct 
>>>> bpf_patch *patch)
>>>> +{
>>>> +     u32 func_id;
>>>> +
>>>> +     /*
>>>> +      * The code below generates the following:
>>>> +      *
>>>> +      * void bpf_xdp_metadata_export_to_skb(struct xdp_md *ctx)
>>>> +      * {
>>>> +      *      struct xdp_to_skb_metadata *meta;
>>>> +      *      int ret;
>>>> +      *
>>>> +      *      ret = bpf_xdp_adjust_meta(ctx, -sizeof(*meta));
>>>> +      *      if (!ret)
>>>> +      *              return;
>>>> +      *
>>>> +      *      meta = ctx->data_meta;
>>>> +      *      meta->magic = xdp_metadata_magic;
>>>> +      *      meta->rx_timestamp = bpf_xdp_metadata_rx_timestamp(ctx);
>>>> +      * }
>>>> +      *
>>>> +      */
>>>> +
>>>> +     bpf_patch_append(patch,
>>>> +             /* r2 = ((struct xdp_buff *)r1)->data_meta; */
>>>> +             BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
>>>> +                         offsetof(struct xdp_buff, data_meta)),
>>>> +             /* r3 = ((struct xdp_buff *)r1)->data; */
>>>> +             BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
>>>> +                         offsetof(struct xdp_buff, data)),
>>>> +             /* if (data_meta != data) return;
>>>> +              *
>>>> +              *      data_meta > data: xdp_data_meta_unsupported()
>>>> +              *      data_meta < data: already used, no need to touch
>>>> +              */
>>>> +             BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, S16_MAX),
>>>> +
>>>> +             /* r2 -= sizeof(struct xdp_to_skb_metadata); */
>>>> +             BPF_ALU64_IMM(BPF_SUB, BPF_REG_2,
>>>> +                           sizeof(struct xdp_to_skb_metadata)),
>>>> +             /* r3 = ((struct xdp_buff *)r1)->data_hard_start; */
>>>> +             BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
>>>> +                         offsetof(struct xdp_buff, data_hard_start)),
>>>> +             /* r3 += sizeof(struct xdp_frame) */
>>>> +             BPF_ALU64_IMM(BPF_ADD, BPF_REG_3,
>>>> +                           sizeof(struct xdp_frame)),
>>>> +             /* if (data-sizeof(struct xdp_to_skb_metadata) < 
>>>> data_hard_start+sizeof(struct xdp_frame)) return; */
>>>> +             BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, S16_MAX),
>>>> +
>>>> +             /* ((struct xdp_buff *)r1)->data_meta = r2; */
>>>> +             BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
>>>> +                         offsetof(struct xdp_buff, data_meta)),
>>>> +
>>>> +             /* *((struct xdp_to_skb_metadata *)r2)->magic = 
>>>> xdp_metadata_magic; */
>>>> +             BPF_ST_MEM(BPF_W, BPF_REG_2,
>>>> +                        offsetof(struct xdp_to_skb_metadata, magic),
>>>> +                        xdp_metadata_magic),
>>>> +     );
>>>> +
>>>> +     /*      r0 = bpf_xdp_metadata_rx_timestamp(ctx); */
>>>> +     func_id = xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP);
>>>> +     prog->aux->xdp_kfunc_ndo->ndo_unroll_kfunc(prog, func_id, patch);
>>>> +
>>>> +     bpf_patch_append(patch,
>>>> +             /* r2 = ((struct xdp_buff *)r1)->data_meta; */
>>>> +             BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
>>>> +                         offsetof(struct xdp_buff, data_meta)),
>>>> +             /* *((struct xdp_to_skb_metadata *)r2)->rx_timestamp = r0; */
>>>> +             BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0,
>>>> +                         offsetof(struct xdp_to_skb_metadata, rx_timestamp)),
>>>
>>> Can the xdp prog still change the metadata through xdp->data_meta? tbh, I am not
>>> sure it is solid enough by asking the xdp prog not to use the same random number
>>> in its own metadata + not to change the metadata through xdp->data_meta after
>>> calling bpf_xdp_metadata_export_to_skb().
>>
>> What do you think the usecase here might be? Or are you suggesting we
>> reject further access to data_meta after
>> bpf_xdp_metadata_export_to_skb somehow?
>>
>> If we want to let the programs override some of this
>> bpf_xdp_metadata_export_to_skb() metadata, it feels like we can add
>> more kfuncs instead of exposing the layout?
>>
>> bpf_xdp_metadata_export_to_skb(ctx);
>> bpf_xdp_metadata_export_skb_hash(ctx, 1234);

After re-reading patch 6, have another question.  The 'void 
bpf_xdp_metadata_export_to_skb();' function signature.  Should it at least 
return ok/err? or even return a 'struct xdp_to_skb_metadata *' pointer and the 
xdp prog can directly read (or even write) it?  A related question, why 'struct 
xdp_to_skb_metadata' needs __randomize_layout?


> 
> 
> I can't think of a use case now for the xdp prog to use the xdp_to_skb_metadata 
> while the xdp prog can directly call the kfunc (eg 
> bpf_xdp_metadata_rx_timestamp) to get individual hint.  I was asking if patch 7 
> is an actual use case because it does test the tstamp in XDP_PASS or it is 
> mostly for selftest purpose?  Yeah, may be the xdp prog will be able to change 
> the xdp_to_skb_metadata eventually but that is for later.
> 
> My concern is the xdp prog is allowed to change xdp_to_skb_metadata or 
> by-coincident writing metadata that matches the random and the sizeof(struct 
> xdp_to_skb_metadata).
> 
> Also, the added opacity of xdp_to_skb_metadata (__randomize_layout + random int) 
> is trying very hard to hide it from xdp prog.  Instead, would it be cleaner to 
> have a flag in xdp->flags (to be set by bpf_xdp_metadata_export_to_skb?) to 
> guard this, like one of Jesper's patch.  The xdp_convert_ctx_access() and 
> bpf_xdp_adjust_meta() can check this bit to ensure the xdp_to_skb_metadata 
> cannot be read and no metadata can be added/deleted after that.  btw, is it 
> possible to keep both xdp_to_skb_metadata and the xdp_prog's metadata?  After 
> skb_metadata_import_from_xdp popping the xdp_to_skb_metadata, the remaining 
> xdp_prog's metatdata can still be used by the bpf-tc.
> 
>> ...
>>
>>> Does xdp_to_skb_metadata have a use case for XDP_PASS (like patch 7) or the
>>> xdp_to_skb_metadata can be limited to XDP_REDIRECT only?
>>
>> XDP_PASS cases where we convert xdp_buff into skb in the drivers right
>> now usually have C code to manually pull out the metadata (out of hw
>> desc) and put it into skb.
>>
>> So, currently, if we're calling bpf_xdp_metadata_export_to_skb() for
>> XDP_PASS, we're doing a double amount of work:
>> skb_metadata_import_from_xdp first, then custom driver code second.
>>
>> In theory, maybe we should completely skip drivers custom parsing when
>> there is a prog with BPF_F_XDP_HAS_METADATA?
>> Then both xdp->skb paths (XDP_PASS+XDP_REDIRECT) will be bpf-driven
>> and won't require any mental work (plus, the drivers won't have to
>> care either in the future).
>>  > WDYT?
> 
> 
> Yeah, not sure if it can solely depend on BPF_F_XDP_HAS_METADATA but it makes 
> sense to only use the hints (if ever written) from xdp prog especially if it 
> will eventually support xdp prog changing some of the hints in the future.  For 
> now, I think either way is fine since they are the same and the xdp prog is sort 
> of doing extra unnecessary work anyway by calling 
> bpf_xdp_metadata_export_to_skb() with XDP_PASS and knowing nothing can be 
> changed now.
> 
> 
>>
>>>> +     );
>>>> +
>>>> +     bpf_patch_resolve_jmp(patch);
>>>> +}
>>>> +
>>>>    static int __init xdp_metadata_init(void)
>>>>    {
>>>> +     xdp_metadata_magic = get_random_u32() | 1;
>>>>        return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, 
>>>> &xdp_metadata_kfunc_set);
>>>>    }
>>>>    late_initcall(xdp_metadata_init);
> 


  reply	other threads:[~2022-11-09  4:19 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-04  3:25 [xdp-hints] [RFC bpf-next v2 00/14] xdp: hints via kfuncs Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 01/14] bpf: Introduce bpf_patch Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 02/14] bpf: Support inlined/unrolled kfuncs for xdp metadata Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 03/14] veth: Introduce veth_xdp_buff wrapper for xdp_buff Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 04/14] veth: Support rx timestamp metadata for xdp Stanislav Fomichev
2022-11-09 11:21   ` [xdp-hints] " Toke Høiland-Jørgensen
2022-11-09 21:34     ` Stanislav Fomichev
2022-11-10  0:25   ` John Fastabend
2022-11-10  1:02     ` Stanislav Fomichev
2022-11-10  1:35       ` John Fastabend
2022-11-10  6:44         ` Stanislav Fomichev
2022-11-10 17:39           ` John Fastabend
2022-11-10 18:52             ` Stanislav Fomichev
2022-11-11 10:41             ` Jesper Dangaard Brouer
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 05/14] selftests/bpf: Verify xdp_metadata xdp->af_xdp path Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 06/14] xdp: Carry over xdp metadata into skb context Stanislav Fomichev
2022-11-07 22:01   ` [xdp-hints] " Martin KaFai Lau
2022-11-08 21:54     ` Stanislav Fomichev
2022-11-09  3:07       ` Martin KaFai Lau
2022-11-09  4:19         ` Martin KaFai Lau [this message]
2022-11-09 11:10           ` Toke Høiland-Jørgensen
2022-11-09 18:22             ` Martin KaFai Lau
2022-11-09 21:33               ` Stanislav Fomichev
2022-11-10  0:13                 ` Martin KaFai Lau
2022-11-10  1:02                   ` Stanislav Fomichev
2022-11-10 14:26                     ` Toke Høiland-Jørgensen
2022-11-10 18:52                       ` Stanislav Fomichev
2022-11-10 23:14                         ` Toke Høiland-Jørgensen
2022-11-10 23:52                           ` Stanislav Fomichev
2022-11-11  0:10                             ` Toke Høiland-Jørgensen
2022-11-11  0:45                               ` Martin KaFai Lau
2022-11-11  9:37                                 ` Toke Høiland-Jørgensen
2022-11-11  0:33                             ` Martin KaFai Lau
2022-11-11  0:57                               ` Stanislav Fomichev
2022-11-11  1:26                                 ` Martin KaFai Lau
2022-11-11  9:41                                   ` Toke Høiland-Jørgensen
2022-11-10 23:58                         ` Martin KaFai Lau
2022-11-11  0:20                           ` Stanislav Fomichev
2022-11-10 14:19               ` Toke Høiland-Jørgensen
2022-11-10 19:04                 ` Martin KaFai Lau
2022-11-10 23:29                   ` Toke Høiland-Jørgensen
2022-11-11  1:39                     ` Martin KaFai Lau
2022-11-11  9:44                       ` Toke Høiland-Jørgensen
2022-11-10  1:26             ` John Fastabend
2022-11-10 14:32               ` Toke Høiland-Jørgensen
2022-11-10 17:30                 ` John Fastabend
2022-11-10 22:49                   ` Toke Høiland-Jørgensen
2022-11-10  1:09   ` John Fastabend
2022-11-10  6:44     ` Stanislav Fomichev
2022-11-10 21:21       ` David Ahern
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 07/14] selftests/bpf: Verify xdp_metadata xdp->skb path Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 08/14] bpf: Helper to simplify calling kernel routines from unrolled kfuncs Stanislav Fomichev
2022-11-05  0:40   ` [xdp-hints] " Alexei Starovoitov
2022-11-05  2:18     ` Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 09/14] ice: Introduce ice_xdp_buff wrapper for xdp_buff Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 10/14] ice: Support rx timestamp metadata for xdp Stanislav Fomichev
2022-11-04 14:35   ` [xdp-hints] " Alexander Lobakin
2022-11-04 18:21     ` Stanislav Fomichev
2022-11-07 17:11       ` Alexander Lobakin
2022-11-07 19:10         ` Stanislav Fomichev
2022-12-15 11:54   ` Larysa Zaremba
2022-12-15 14:29     ` Toke Høiland-Jørgensen
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 11/14] mlx4: Introduce mlx4_xdp_buff wrapper for xdp_buff Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 12/14] mxl4: Support rx timestamp metadata for xdp Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 13/14] bnxt: Introduce bnxt_xdp_buff wrapper for xdp_buff Stanislav Fomichev
2022-11-04  3:25 ` [xdp-hints] [RFC bpf-next v2 14/14] bnxt: Support rx timestamp metadata for xdp Stanislav Fomichev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://lists.xdp-project.net/postorius/lists/xdp-hints.xdp-project.net/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6f57370f-7ec3-07dd-54df-04423cab6d1f@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=alexandr.lobakin@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=dsahern@gmail.com \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=magnus.karlsson@gmail.com \
    --cc=mtahhan@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    --cc=willemb@google.com \
    --cc=xdp-hints@xdp-project.net \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox