From: Martin KaFai Lau <martin.lau@linux.dev>
To: "D. Wythe" <alibuda@linux.alibaba.com>
Cc: kgraul@linux.ibm.com, wenjia@linux.ibm.com, jaka@linux.ibm.com,
ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
pabeni@redhat.com, song@kernel.org, sdf@google.com,
haoluo@google.com, yhs@fb.com, edumazet@google.com,
john.fastabend@gmail.com, kpsingh@kernel.org, jolsa@kernel.org,
guwen@linux.alibaba.com, kuba@kernel.org, davem@davemloft.net,
netdev@vger.kernel.org, linux-s390@vger.kernel.org,
linux-rdma@vger.kernel.org, bpf@vger.kernel.org
Subject: Re: [PATCH bpf-next v5 5/5] bpf/selftests: add selftest for bpf_smc_ops
Date: Tue, 7 Jan 2025 16:48:51 -0800 [thread overview]
Message-ID: <5ff5cb2b-625b-44ba-8472-95e007f24824@linux.dev> (raw)
In-Reply-To: <20250107041715.98342-6-alibuda@linux.alibaba.com>
On 1/6/25 8:17 PM, D. Wythe wrote:
> +static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, __u16 nlmsg_flags,
> + __u8 genl_cmd, __u16 nla_type,
> + void *nla_data, int nla_len)
> +{
> + struct nlattr *na;
> + struct sockaddr_nl nladdr;
> + int r, buflen;
> + char *buf;
> +
> + struct msgtemplate msg = {0};
> +
> + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
> + msg.n.nlmsg_type = nlmsg_type;
> + msg.n.nlmsg_flags = nlmsg_flags;
> + msg.n.nlmsg_seq = 0;
> + msg.n.nlmsg_pid = nlmsg_pid;
> + msg.g.cmd = genl_cmd;
> + msg.g.version = 1;
> + na = (struct nlattr *) GENLMSG_DATA(&msg);
> + na->nla_type = nla_type;
> + na->nla_len = nla_len + 1 + NLA_HDRLEN;
> + memcpy(NLA_DATA(na), nla_data, nla_len);
> + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
> +
> + buf = (char *) &msg;
> + buflen = msg.n.nlmsg_len;
> + memset(&nladdr, 0, sizeof(nladdr));
> + nladdr.nl_family = AF_NETLINK;
> +
> + while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *) &nladdr,
> + sizeof(nladdr))) < buflen) {
> + if (r > 0) {
> + buf += r;
> + buflen -= r;
> + } else if (errno != EAGAIN)
> + return -1;
> + }
The "}" indentation is off.
I was wondering if it missed a "}" for the while loop. Turns out the "else if"
does not have braces while the "if" has. I would add braces to the "else if"
also to avoid confusion like this.
> + return 0;
> +}
> +
> +static bool get_smc_nl_family_id(void)
> +{
> + struct sockaddr_nl nl_src;
> + struct msgtemplate msg;
> + struct nlattr *nl;
> + int fd, ret;
> + pid_t pid;
> +
> + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
> + if (!ASSERT_GT(fd, 0, "nl_family socket"))
> + return false;
> +
> + pid = getpid();
> +
> + memset(&nl_src, 0, sizeof(nl_src));
> + nl_src.nl_family = AF_NETLINK;
> + nl_src.nl_pid = pid;
> +
> + ret = bind(fd, (struct sockaddr *) &nl_src, sizeof(nl_src));
> + if (!ASSERT_GE(ret, 0, "nl_family bind"))
> + goto fail;
> +
> + ret = send_cmd(fd, GENL_ID_CTRL, pid,
> + NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
> + CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
> + strlen(SMC_GENL_FAMILY_NAME));
> + if (!ASSERT_EQ(ret, 0, "nl_family query"))
> + goto fail;
> +
> + ret = recv(fd, &msg, sizeof(msg), 0);
> + if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || (ret < 0) ||
> + !NLMSG_OK((&msg.n), ret), "nl_family response"))
> + goto fail;
> +
> + nl = (struct nlattr *) GENLMSG_DATA(&msg);
> + nl = (struct nlattr *) ((char *) nl + NLA_ALIGN(nl->nla_len));
> + if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
> + goto fail;
> +
> + smc_nl_family_id = *(uint16_t *) NLA_DATA(nl);
> + close(fd);
> + return true;
> +fail:
> + close(fd);
> + return false;
> +}
> +
> +static bool smc_ueid(int op)
> +{
> + struct sockaddr_nl nl_src;
> + struct msgtemplate msg;
> + struct nlmsgerr *err;
> + char test_ueid[32];
> + int fd, ret;
> + pid_t pid;
> +
> + /* UEID required */
> + memset(test_ueid, '\x20', sizeof(test_ueid));
> + memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
> + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
> + if (!ASSERT_GT(fd, 0, "ueid socket"))
> + return false;
> +
> + pid = getpid();
> + memset(&nl_src, 0, sizeof(nl_src));
> + nl_src.nl_family = AF_NETLINK;
> + nl_src.nl_pid = pid;
> +
> + ret = bind(fd, (struct sockaddr *) &nl_src, sizeof(nl_src));
> + if (!ASSERT_GE(ret, 0, "ueid bind"))
> + goto fail;
> +
> + ret = send_cmd(fd, smc_nl_family_id, pid,
> + NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
> + (void *)test_ueid, sizeof(test_ueid));
Same. Indentation is off.
> + if (!ASSERT_EQ(ret, 0, "ueid cmd"))
> + goto fail;
> +
> + ret = recv(fd, &msg, sizeof(msg), 0);
> + if (!ASSERT_FALSE((ret < 0) || !NLMSG_OK((&msg.n), ret), "ueid response"))
> + goto fail;
> +
> + if (msg.n.nlmsg_type == NLMSG_ERROR) {
> + err = NLMSG_DATA(&msg);
> + switch (op) {
> + case SMC_NETLINK_REMOVE_UEID:
> + if (!ASSERT_FALSE((err->error && err->error != -ENOENT), "ueid remove"))
> + goto fail;
> + break;
> + case SMC_NETLINK_ADD_UEID:
> + if (!ASSERT_EQ(err->error, 0, "ueid add"))
> + goto fail;
> + break;
> + default:
> + break;
> + }
> + }
> + close(fd);
> + return true;
> +fail:
> + close(fd);
> + return false;
> +}
> +
> +static bool setup_netns(void)
> +{
> + if (!ASSERT_OK(make_netns(TEST_NS), "create net namespace"))
> + return false;
> +
> + nstoken = open_netns(TEST_NS);
Instead of make_netns and then immediately open_netns, try netns_new(TEST_NS,
true) from the test_progs.c.
> + if (!ASSERT_OK_PTR(nstoken, "open net namespace"))
> + goto fail_open_netns;
> +
> + if (!ASSERT_OK(system("ip addr add 127.0.1.0/8 dev lo"), "add server node"))
> + goto fail_ip;
> +
> + if (!ASSERT_OK(system("ip addr add 127.0.2.0/8 dev lo"), "server via risk path"))
> + goto fail_ip;
> +
> + return true;
> +fail_open_netns:
> + remove_netns(TEST_NS);
> +fail_ip:
> + close_netns(nstoken);
> + return false;
> +}
> +
> +static void cleanup_netns(void)
> +{
> + close_netns(nstoken);
> + remove_netns(TEST_NS);
> +}
> +
> +static bool setup_ueid(void)
> +{
> + /* get smc nl id */
> + if (!get_smc_nl_family_id())
> + return false;
> + /* clear old ueid for bpftest */
> + smc_ueid(SMC_NETLINK_REMOVE_UEID);
> + /* smc-loopback required ueid */
> + return smc_ueid(SMC_NETLINK_ADD_UEID);
> +}
> +
> +static void cleanup_ueid(void)
> +{
> + smc_ueid(SMC_NETLINK_REMOVE_UEID);
> +}
> +
> +static bool setup_smc(void)
> +{
> + if (!setup_ueid())
> + return false;
> +
> + if (!setup_netns())
> + goto fail_netns;
> +
> + return true;
> +fail_netns:
> + cleanup_ueid();
> + return false;
> +}
> +
> +static int set_client_addr_cb(int fd, void *opts)
> +{
> + const char *src = (const char *)opts;
> + struct sockaddr_in localaddr;
> +
> + localaddr.sin_family = AF_INET;
> + localaddr.sin_port = htons(0);
> + localaddr.sin_addr.s_addr = inet_addr(src);
> + return !ASSERT_EQ(bind(fd, &localaddr, sizeof(localaddr)), 0, "client bind");
> +}
> +
> +static void run_link(const char *src, const char *dst, int port)
> +{
> + struct network_helper_opts opts = {0};
> + int server, client;
> +
> + server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
> + if (!ASSERT_OK_FD(server, "start service_1"))
> + return;
> +
> + opts.proto = IPPROTO_TCP;
> + opts.post_socket_cb = set_client_addr_cb;
> + opts.cb_opts = (void *)src;
> +
> + client = connect_to_fd_opts(server, &opts);
> + if (!ASSERT_OK_FD(client, "start connect"))
> + goto fail_client;
> +
> + close(client);
> +fail_client:
> + close(server);
> +}
> +
> +static void block_link(int map_fd, const char *src, const char *dst)
> +{
> + struct smc_strat_ip_value val = { .mode = /* block */ 0 };
> + struct smc_strat_ip_key key = {
> + .sip = inet_addr(src),
> + .dip = inet_addr(dst),
> + };
> +
> + bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
> +}
> +
> +/*
> + * This test describes a real-life service topology as follows:
> + *
> + * +-------------> service_1
> + * link1 | |
> + * +--------------------> server | link 2
> + * | | V
> + * | +-------------> service_2
> + * | link 3
> + * client -------------------> server_via_unsafe_path -> service_3
> + *
> + * Among them,
> + * 1. link-1 is very suitable for using SMC.
> + * 2. link-2 is not suitable for using SMC, because the mode of this link is kind of
> + * short-link services.
> + * 3. link-3 is also not suitable for using SMC, because the RDMA link is unavailable and
> + * needs to go through a long timeout before it can fallback to TCP.
> + *
> + * To achieve this goal, we use a customized SMC ip strategy via smc_ops.
> + */
> +static void test_topo(void)
> +{
> + struct bpf_smc *skel;
> + int rc, map_fd;
> +
> + skel = bpf_smc__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
> + return;
> +
> + rc = bpf_smc__attach(skel);
> + if (!ASSERT_EQ(rc, 0, "bpf_smc__attach"))
> + goto fail;
> +
> + map_fd = bpf_map__fd(skel->maps.smc_strats_ip);
> + if (!ASSERT_GT(map_fd, 0, "bpf_map__fd"))
> + goto fail;
> +
> + /* Mock the process of transparent replacement, since we will modify protocol
> + * to ipproto_smc accropding to it via fmod_ret/update_socket_protocol.
> + */
> + system("sysctl -w net.smc.ops=linkcheck");
> +
> + /* Configure ip strat */
> + block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
> + block_link(map_fd, SERVER_IP, SERVER_IP);
> + close(map_fd);
No need to close(map-fd) here. bpf_smc__destroy(skel) will do it.
It seems the new selftest fails also. not always though which is concerning.
pw-bot: cr
> +
> + /* should go with smc */
> + run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
> + /* should go with smc fallback */
> + run_link(SERVER_IP, SERVER_IP, SERVICE_2);
> +
> + ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
> + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
> +
> + /* should go with smc */
> + run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
> +
> + ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
> + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
> +
> + /* should go with smc fallback */
> + run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
> +
> + ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
> + ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
> +
> +fail:
> + bpf_smc__destroy(skel);
> +}
next prev parent reply other threads:[~2025-01-08 0:49 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-01-07 4:17 [PATCH bpf-next v5 0/5] net/smc: Introduce smc_ops D. Wythe
2025-01-07 4:17 ` [PATCH bpf-next v5 1/5] bpf: export necessary sympols for modules with struct_ops D. Wythe
2025-01-07 4:17 ` [PATCH bpf-next v5 2/5] net/smc: Introduce generic hook smc_ops D. Wythe
2025-01-13 11:40 ` Dust Li
2025-01-14 9:42 ` D. Wythe
2025-01-13 11:49 ` Dust Li
2025-01-14 9:51 ` D. Wythe
2025-01-07 4:17 ` [PATCH bpf-next v5 3/5] net/smc: bpf: register smc_ops info struct_ops D. Wythe
2025-01-07 4:17 ` [PATCH bpf-next v5 4/5] libbpf: fix error when st-prefix_ops and ops from differ btf D. Wythe
2025-01-07 4:17 ` [PATCH bpf-next v5 5/5] bpf/selftests: add selftest for bpf_smc_ops D. Wythe
2025-01-08 0:48 ` Martin KaFai Lau [this message]
2025-01-08 13:56 ` D. Wythe
2025-01-07 5:19 ` [PATCH bpf-next v5 0/5] net/smc: Introduce smc_ops D. Wythe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5ff5cb2b-625b-44ba-8472-95e007f24824@linux.dev \
--to=martin.lau@linux.dev \
--cc=alibuda@linux.alibaba.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=guwen@linux.alibaba.com \
--cc=haoluo@google.com \
--cc=jaka@linux.ibm.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kgraul@linux.ibm.com \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@google.com \
--cc=song@kernel.org \
--cc=wenjia@linux.ibm.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).