netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiri Pirko <jiri@resnulli.us>
To: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: davem@davemloft.net, xiyou.wangcong@gmail.com,
	eric.dumazet@gmail.com, netdev@vger.kernel.org
Subject: Re: [PATCH net-next v8 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
Date: Tue, 25 Apr 2017 14:13:38 +0200	[thread overview]
Message-ID: <20170425121338.GC1867@nanopsycho.orion> (raw)
In-Reply-To: <1493121247-11863-3-git-send-email-jhs@emojatatu.com>

Tue, Apr 25, 2017 at 01:54:06PM CEST, jhs@mojatatu.com wrote:
>From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>When you dump hundreds of thousands of actions, getting only 32 per
>dump batch even when the socket buffer and memory allocations allow
>is inefficient.
>
>With this change, the user will get as many as possibly fitting
>within the given constraints available to the kernel.
>
>The top level action TLV space is extended. An attribute
>TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
>is set by the user indicating the user is capable of processing
>these large dumps. Older user space which doesnt set this flag
>doesnt get the large (than 32) batches.
>The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
>actions are put in a single batch. As such user space app knows how long
>to iterate (independent of the type of action being dumped)
>instead of hardcoded maximum of 32.
>
>Some results dumping 1.5M actions, first unpatched tc which the
>kernel doesnt help:
>
>prompt$ time -p tc actions ls action gact | grep index | wc -l
>1500000
>real 1388.43
>user 2.07
>sys 1386.79
>
>Now lets see a patched tc which sets the correct flags when requesting
>a dump:
>
>prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
>1500000
>real 178.13
>user 2.02
>sys 176.96
>
>That is about 8x performance improvement for tc which sets its
>receive buffer to about 32K.
>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/uapi/linux/rtnetlink.h | 22 ++++++++++++++--
> net/sched/act_api.c            | 59 +++++++++++++++++++++++++++++++++++-------
> 2 files changed, 69 insertions(+), 12 deletions(-)
>
>diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>index cce0613..5875467 100644
>--- a/include/uapi/linux/rtnetlink.h
>+++ b/include/uapi/linux/rtnetlink.h
>@@ -674,10 +674,28 @@ struct tcamsg {
> 	unsigned char	tca__pad1;
> 	unsigned short	tca__pad2;
> };
>+
>+enum {
>+	TCA_ROOT_UNSPEC,
>+	TCA_ROOT_TAB,
>+#define TCA_ACT_TAB TCA_ROOT_TAB
>+	TCA_ROOT_FLAGS,
>+	TCA_ROOT_COUNT,
>+	__TCA_ROOT_MAX,
>+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
>+};
>+
> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>-#define TCAA_MAX 1
>+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
>+ *
>+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>+ * actions in a dump. All dump responses will contain the number of actions
>+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
>+ *
>+ */
>+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)

BIT (I think I mentioned this before)

>+#define VALID_TCA_FLAGS TCA_FLAG_LARGE_DUMP_ON

Again, namespace please. "TCA_ROOT_FLAGS_VALID"
Why is this UAPI?


> 
> /* New extended info filters for IFLA_EXT_MASK */
> #define RTEXT_FILTER_VF		(1 << 0)
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 9ce22b7..2756213 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			   struct netlink_callback *cb)
> {
> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>+	u32 act_flags = cb->args[2];
> 	struct nlattr *nest;
> 
> 	spin_lock_bh(&hinfo->lock);
>@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			}
> 			nla_nest_end(skb, nest);
> 			n_i++;
>-			if (n_i >= TCA_ACT_MAX_PRIO)
>+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
>+			    n_i >= TCA_ACT_MAX_PRIO)
> 				goto done;
> 		}
> 	}
> done:
> 	spin_unlock_bh(&hinfo->lock);
>-	if (n_i)
>+	if (n_i) {
> 		cb->args[0] += n_i;
>+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
>+			cb->args[1] = n_i;
>+	}
> 	return n_i;
> 
> nla_put_failure:
>@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
> 	return tcf_add_notify(net, n, &actions, portid);
> }
> 
>+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
>+	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
>+};
>+
> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 			 struct netlink_ext_ack *extack)
> {
> 	struct net *net = sock_net(skb->sk);
>-	struct nlattr *tca[TCAA_MAX + 1];
>+	struct nlattr *tca[TCA_ROOT_MAX + 1];
> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
> 	int ret = 0, ovr = 0;
> 
>@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	    !netlink_capable(skb, CAP_NET_ADMIN))
> 		return -EPERM;
> 
>-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
>+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
> 			  extack);
> 	if (ret < 0)
> 		return ret;
>@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	return ret;
> }
> 
>-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>+static struct nlattr *find_dump_kind(struct nlattr **nla)
> {
> 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
> 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
>-	struct nlattr *nla[TCAA_MAX + 1];
> 	struct nlattr *kind;
> 
>-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
>-			NULL, NULL) < 0)
>-		return NULL;
> 	tb1 = nla[TCA_ACT_TAB];
> 	if (tb1 == NULL)
> 		return NULL;
>@@ -1073,6 +1078,16 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
> 	return kind;
> }
> 
>+static inline bool tca_flags_valid(u32 act_flags)
>+{
>+	u32 invalid_flags_mask  = ~VALID_TCA_FLAGS;
>+
>+	if (act_flags & invalid_flags_mask)
>+		return false;

I don't see how this resolves anything. VALID_TCA_FLAGS is set in stone
not going to change anytime in future, right? Then the TCA_ROOT_FLAGS
attr will always contain only one flag, right?
Then, I don't see why do we need this dance... u8 flag attr resolves
this in elegant way. I know I sound like a broken record. This is the
last time I'm commenting on this. I give up.


>+
>+	return true;
>+}
>+
> static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> {
> 	struct net *net = sock_net(skb->sk);
>@@ -1082,8 +1097,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	struct tc_action_ops *a_o;
> 	int ret = 0;
> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>-	struct nlattr *kind = find_dump_kind(cb->nlh);
>+	struct nlattr *count_attr = NULL;
>+	struct nlattr *tb[TCA_ROOT_MAX + 1];
>+	struct nlattr *kind = NULL;
>+	u32 act_flags = 0;
>+	u32 act_count = 0;
>+
>+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
>+			  tcaa_policy, NULL);
>+	if (ret < 0)
>+		return ret;
> 
>+	kind = find_dump_kind(tb);
> 	if (kind == NULL) {
> 		pr_info("tc_dump_action: action bad kind\n");
> 		return 0;
>@@ -1093,14 +1118,25 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (a_o == NULL)
> 		return 0;
> 
>+	if (tb[TCA_ROOT_FLAGS])
>+		act_flags = nla_get_u32(tb[TCA_ROOT_FLAGS]);
>+
>+	if (act_flags && !tca_flags_valid(act_flags))
>+		return -EINVAL;
>+
> 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> 			cb->nlh->nlmsg_type, sizeof(*t), 0);
> 	if (!nlh)
> 		goto out_module_put;
>+
>+	cb->args[2] = act_flags;
> 	t = nlmsg_data(nlh);
> 	t->tca_family = AF_UNSPEC;
> 	t->tca__pad1 = 0;
> 	t->tca__pad2 = 0;
>+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
>+	if (!count_attr)
>+		goto out_module_put;
> 
> 	nest = nla_nest_start(skb, TCA_ACT_TAB);
> 	if (nest == NULL)
>@@ -1113,6 +1149,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (ret > 0) {
> 		nla_nest_end(skb, nest);
> 		ret = skb->len;
>+		act_count = cb->args[1];
>+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
>+		cb->args[1] = 0;
> 	} else
> 		nlmsg_trim(skb, b);
> 
>-- 
>1.9.1
>

  reply	other threads:[~2017-04-25 12:13 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-25 11:54 [PATCH net-next v8 0/3] net sched actions: improve dump performance Jamal Hadi Salim
2017-04-25 11:54 ` [PATCH net-next v8 1/3] net sched actions: Use proper root attribute table for actions Jamal Hadi Salim
2017-04-25 18:42   ` Simon Horman
2017-04-25 11:54 ` [PATCH net-next v8 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jamal Hadi Salim
2017-04-25 12:13   ` Jiri Pirko [this message]
2017-04-25 13:01     ` Jamal Hadi Salim
2017-04-25 16:04       ` Jiri Pirko
2017-04-25 20:29         ` Jamal Hadi Salim
2017-04-26  6:19           ` Jiri Pirko
2017-04-26 11:07             ` Simon Horman
2017-04-26 13:00               ` Jamal Hadi Salim
2017-04-26 11:48             ` Jamal Hadi Salim
2017-04-26 12:08               ` Jiri Pirko
2017-04-26 13:14                 ` Jamal Hadi Salim
2017-04-26 13:56                   ` Jiri Pirko
2017-04-26 20:07                     ` Jamal Hadi Salim
2017-04-27  6:30                       ` Jiri Pirko
2017-04-28  1:22                         ` Jamal Hadi Salim
2017-04-28  7:02                           ` Jiri Pirko
2017-04-28 12:30                             ` Jamal Hadi Salim
2017-04-28 13:21                               ` Jiri Pirko
2017-04-28 13:42                                 ` Jamal Hadi Salim
2017-04-28 14:02                                   ` Jiri Pirko
2017-04-30 10:34                                 ` Jamal Hadi Salim
2017-04-26 11:02         ` Simon Horman
2017-04-26 12:46           ` Jamal Hadi Salim
2017-04-26 13:05             ` Jiri Pirko
2017-04-26 14:46               ` David Miller
2017-04-26 14:58                 ` Jiri Pirko
2017-04-28 12:21                 ` Simon Horman
2017-04-28 12:55                   ` Jamal Hadi Salim
2017-04-28 13:21                     ` Jiri Pirko
2017-04-25 11:54 ` [PATCH net-next v8 3/3] net sched actions: add time filter for action dumping Jamal Hadi Salim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170425121338.GC1867@nanopsycho.orion \
    --to=jiri@resnulli.us \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=jhs@mojatatu.com \
    --cc=netdev@vger.kernel.org \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).