All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Sitnicki <jakub@cloudflare.com>
To: menglong8.dong@gmail.com
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	linux-kernel@vger.kernel.org, mengensun@tencent.com,
	flyingpeng@tencent.com, mungerjiang@tencent.com,
	Menglong Dong <imagedong@tencent.com>
Subject: Re: [PATCH bpf-next] bpf: Add document for 'dst_port' of 'struct bpf_sock'
Date: Tue, 25 Jan 2022 20:24:27 +0100	[thread overview]
Message-ID: <87sftbobys.fsf@cloudflare.com> (raw)
In-Reply-To: <20220113070245.791577-1-imagedong@tencent.com>

On Thu, Jan 13, 2022 at 08:02 AM CET, menglong8.dong@gmail.com wrote:
> From: Menglong Dong <imagedong@tencent.com>
>
> The description of 'dst_port' in 'struct bpf_sock' is not accurated.
> In fact, 'dst_port' is not in network byte order, it is 'partly' in
> network byte order.
>
> We can see it in bpf_sock_convert_ctx_access():
>
>> case offsetof(struct bpf_sock, dst_port):
>> 	*insn++ = BPF_LDX_MEM(
>> 		BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
>> 		si->dst_reg, si->src_reg,
>> 		bpf_target_off(struct sock_common, skc_dport,
>> 			       sizeof_field(struct sock_common,
>> 					    skc_dport),
>> 			       target_size));
>
> It simply passes 'sock_common->skc_dport' to 'bpf_sock->dst_port',
> which makes that the low 16-bits of 'dst_port' is equal to 'skc_port'
> and is in network byte order, but the high 16-bites of 'dst_port' is
> 0. And the actual port is 'bpf_ntohs((__u16)dst_port)', and
> 'bpf_ntohl(dst_port)' is totally not the right port.
>
> This is different form 'remote_port' in 'struct bpf_sock_ops' or
> 'struct __sk_buff':
>
>> case offsetof(struct __sk_buff, remote_port):
>> 	BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
>>
>> 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
>> 			      si->dst_reg, si->src_reg,
>> 				      offsetof(struct sk_buff, sk));
>> 	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
>> 			      bpf_target_off(struct sock_common,
>> 					     skc_dport,
>> 					     2, target_size));
>> #ifndef __BIG_ENDIAN_BITFIELD
>> 	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
>> #endif
>
> We can see that it will left move 16-bits in little endian, which makes
> the whole 'remote_port' is in network byte order, and the actual port
> is bpf_ntohl(remote_port).
>
> Note this in the document of 'dst_port'. ( Maybe this should be unified
> in the code? )
>
> Signed-off-by: Menglong Dong <imagedong@tencent.com>
> ---
>  include/uapi/linux/bpf.h | 6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b0383d371b9a..891a182a749a 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5500,7 +5500,11 @@ struct bpf_sock {
>  	__u32 src_ip4;
>  	__u32 src_ip6[4];
>  	__u32 src_port;		/* host byte order */
> -	__u32 dst_port;		/* network byte order */
> +	__u32 dst_port;		/* low 16-bits are in network byte order,
> +				 * and high 16-bits are filled by 0.
> +				 * So the real port in host byte order is
> +				 * bpf_ntohs((__u16)dst_port).
> +				 */
>  	__u32 dst_ip4;
>  	__u32 dst_ip6[4];
>  	__u32 state;

I'm probably missing something obvious, but is there anything stopping
us from splitting the field, so that dst_ports is 16-bit wide?

I gave a quick check to the change below and it seems to pass verifier
checks and sock_field tests.

IDK, just an idea. Didn't give it a deeper thought.

--8<--

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4a2f7041ebae..344d62ccafba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5574,7 +5574,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__u16 unused;
+	__u16 dst_port;		/* network byte order */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
diff --git a/net/core/filter.c b/net/core/filter.c
index a06931c27eeb..c56b8ba82de5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8276,7 +8276,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case offsetof(struct bpf_sock, family):
 	case offsetof(struct bpf_sock, type):
 	case offsetof(struct bpf_sock, protocol):
-	case offsetof(struct bpf_sock, dst_port):
 	case offsetof(struct bpf_sock, src_port):
 	case offsetof(struct bpf_sock, rx_queue_mapping):
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
@@ -8285,6 +8284,9 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
 		bpf_ctx_record_field_size(info, size_default);
 		return bpf_ctx_narrow_access_ok(off, size, size_default);
+	case offsetof(struct bpf_sock, dst_port):
+		bpf_ctx_record_field_size(info, sizeof(__u16));
+		return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16));
 	}

 	return size == size_default;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4a2f7041ebae..344d62ccafba 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5574,7 +5574,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__u16 unused;
+	__u16 dst_port;		/* network byte order */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;

  parent reply	other threads:[~2022-01-25 19:24 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-13  7:02 [PATCH bpf-next] bpf: Add document for 'dst_port' of 'struct bpf_sock' menglong8.dong
2022-01-13 18:55 ` Song Liu
2022-01-19 22:03 ` Alexei Starovoitov
2022-01-20  3:02   ` Menglong Dong
2022-01-20  4:17     ` Alexei Starovoitov
2022-01-20 14:14       ` Menglong Dong
2022-01-21  5:17         ` Alexei Starovoitov
2022-01-25  0:35           ` Martin KaFai Lau
2022-01-25  1:03             ` Alexei Starovoitov
2022-01-25  1:16               ` Martin KaFai Lau
2022-01-25  3:09             ` Menglong Dong
2022-01-25 19:24 ` Jakub Sitnicki [this message]
2022-01-25 22:45   ` Martin KaFai Lau
2022-01-25 23:02     ` Alexei Starovoitov
2022-01-25 23:53       ` Martin KaFai Lau
2022-01-27 17:31         ` Jakub Sitnicki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87sftbobys.fsf@cloudflare.com \
    --to=jakub@cloudflare.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=flyingpeng@tencent.com \
    --cc=imagedong@tencent.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mengensun@tencent.com \
    --cc=menglong8.dong@gmail.com \
    --cc=mungerjiang@tencent.com \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.