Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 09/17] netfilter: add missing IS_ENABLED(CONFIG_BRIDGE_NETFILTER) checks to header-file.
From: Pablo Neira Ayuso @ 2019-08-13 18:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20190813183701.4002-1-pablo@netfilter.org>

From: Jeremy Sowden <jeremy@azazel.net>

br_netfilter.h defines inline functions that use an enum constant and
struct member that are only defined if CONFIG_BRIDGE_NETFILTER is
enabled.  Added preprocessor checks to ensure br_netfilter.h will
compile if CONFIG_BRIDGE_NETFILTER is disabled.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index ca121ed906df..33533ea852a1 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -8,12 +8,16 @@
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
 
 	if (b)
 		memset(b, 0, sizeof(*b));
 
 	return b;
+#else
+	return NULL;
+#endif
 }
 
 void nf_bridge_update_protocol(struct sk_buff *skb);
@@ -38,10 +42,14 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct net_bridge_port *port;
 
 	port = br_port_get_rcu(dev);
 	return port ? &port->br->fake_rtable : NULL;
+#else
+	return NULL;
+#endif
 }
 
 struct net_device *setup_pre_routing(struct sk_buff *skb,
-- 
2.11.0



^ permalink raw reply related

* [PATCH 06/17] netfilter: nf_tables: store data in offload context registers
From: Pablo Neira Ayuso @ 2019-08-13 18:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20190813183701.4002-1-pablo@netfilter.org>

Store immediate data into offload context register. This allows follow
up instructions to take it from the corresponding source register.

This patch is required to support for payload mangling, although other
instructions that take data from source register will benefit from this
too.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_offload.h |  1 +
 net/netfilter/nft_immediate.c             | 24 +++++++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663a10e3..4977fbe7ed08 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -9,6 +9,7 @@ struct nft_offload_reg {
 	u32		len;
 	u32		base_offset;
 	u32		offset;
+	struct nft_data data;
 	struct nft_data	mask;
 };
 
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index ca2ae4b95a8d..c7f0ef73d939 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,17 +125,13 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
 	return 0;
 }
 
-static int nft_immediate_offload(struct nft_offload_ctx *ctx,
-				 struct nft_flow_rule *flow,
-				 const struct nft_expr *expr)
+static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx,
+					 struct nft_flow_rule *flow,
+					 const struct nft_immediate_expr *priv)
 {
-	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 	struct flow_action_entry *entry;
 	const struct nft_data *data;
 
-	if (priv->dreg != NFT_REG_VERDICT)
-		return -EOPNOTSUPP;
-
 	entry = &flow->rule->action.entries[ctx->num_actions++];
 
 	data = &priv->data;
@@ -153,6 +149,20 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
 	return 0;
 }
 
+static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+				 struct nft_flow_rule *flow,
+				 const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg == NFT_REG_VERDICT)
+		return nft_immediate_offload_verdict(ctx, flow, priv);
+
+	memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data));
+
+	return 0;
+}
+
 static const struct nft_expr_ops nft_imm_ops = {
 	.type		= &nft_imm_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
-- 
2.11.0



^ permalink raw reply related

* [PATCH 04/17] netfilter: remove unnecessary spaces
From: Pablo Neira Ayuso @ 2019-08-13 18:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20190813183701.4002-1-pablo@netfilter.org>

From: yangxingwu <xingwu.yang@gmail.com>

This patch removes extra spaces.

Signed-off-by: yangxingwu <xingwu.yang@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_gen.h  | 2 +-
 net/netfilter/ipset/ip_set_list_set.c  | 2 +-
 net/netfilter/ipvs/ip_vs_core.c        | 2 +-
 net/netfilter/ipvs/ip_vs_mh.c          | 4 ++--
 net/netfilter/ipvs/ip_vs_proto_tcp.c   | 2 +-
 net/netfilter/nf_conntrack_ftp.c       | 2 +-
 net/netfilter/nf_conntrack_proto_tcp.c | 2 +-
 net/netfilter/nfnetlink_log.c          | 4 ++--
 net/netfilter/nfnetlink_queue.c        | 4 ++--
 net/netfilter/xt_IDLETIMER.c           | 2 +-
 10 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 0feb77fa9edc..3fced06ab752 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -953,7 +953,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 		mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
 #endif
 		key = HKEY(d, h->initval, t->htable_bits);
-		n =  rcu_dereference_bh(hbucket(t, key));
+		n = rcu_dereference_bh(hbucket(t, key));
 		if (!n)
 			continue;
 		for (i = 0; i < n->pos; i++) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6f9ead6319e0..67ac50104e6f 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -288,7 +288,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	if (n &&
 	    !(SET_WITH_TIMEOUT(set) &&
 	      ip_set_timeout_expired(ext_timeout(n, set))))
-		n =  NULL;
+		n = NULL;
 
 	e = kzalloc(set->dsize, GFP_ATOMIC);
 	if (!e)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 46f06f92ab8f..8b80ab794a92 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -617,7 +617,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
 				      iph->protocol == IPPROTO_UDP) ?
 				      IP_VS_CONN_F_ONE_PACKET : 0;
-		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
+		union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
 
 		/* create a new connection entry */
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
index 94d9d349ebb0..da0280cec506 100644
--- a/net/netfilter/ipvs/ip_vs_mh.c
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
 		return 0;
 	}
 
-	table =  kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
-			 sizeof(unsigned long), GFP_KERNEL);
+	table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
+			sizeof(unsigned long), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 000d961b97e4..32b028853a7c 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -710,7 +710,7 @@ static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd
 							sizeof(tcp_timeouts));
 	if (!pd->timeout_table)
 		return -ENOMEM;
-	pd->tcp_state_table =  tcp_states;
+	pd->tcp_state_table = tcp_states;
 	return 0;
 }
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 0ecb3e289ef2..c57d2348c505 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -162,7 +162,7 @@ static int try_rfc959(const char *data, size_t dlen,
 	if (length == 0)
 		return 0;
 
-	cmd->u3.ip =  htonl((array[0] << 24) | (array[1] << 16) |
+	cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) |
 				    (array[2] << 8) | array[3]);
 	cmd->u.tcp.port = htons((array[4] << 8) | array[5]);
 	return length;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 85c1f8c213b0..1926fd56df56 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1227,7 +1227,7 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
-	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
+	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len = sizeof(struct nf_ct_tcp_flags) },
 };
 
 #define TCP_NLATTR_SIZE	( \
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6dee4f9a944c..d69e1863e536 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -651,7 +651,7 @@ nfulnl_log_packet(struct net *net,
 	/* FIXME: do we want to make the size calculation conditional based on
 	 * what is actually present?  way more branches and checks, but more
 	 * memory efficient... */
-	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
+	size = nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -668,7 +668,7 @@ nfulnl_log_packet(struct net *net,
 		+ nla_total_size(sizeof(struct nfgenmsg));	/* NLMSG_DONE */
 
 	if (in && skb_mac_header_was_set(skb)) {
-		size +=   nla_total_size(skb->dev->hard_header_len)
+		size += nla_total_size(skb->dev->hard_header_len)
 			+ nla_total_size(sizeof(u_int16_t))	/* hwtype */
 			+ nla_total_size(sizeof(u_int16_t));	/* hwlen */
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index b6a7ce622c72..feabdfb22920 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -394,7 +394,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	char *secdata = NULL;
 	u32 seclen = 0;
 
-	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
+	size = nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
 		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */
@@ -453,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	}
 
 	if (queue->flags & NFQA_CFG_F_UID_GID) {
-		size +=  (nla_total_size(sizeof(u_int32_t))	/* uid */
+		size += (nla_total_size(sizeof(u_int32_t))	/* uid */
 			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
 	}
 
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 9cec9eae556a..f56d3ed93e56 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -283,7 +283,7 @@ static int __init idletimer_tg_init(void)
 
 	idletimer_tg_kobj = &idletimer_tg_device->kobj;
 
-	err =  xt_register_target(&idletimer_tg);
+	err = xt_register_target(&idletimer_tg);
 	if (err < 0) {
 		pr_debug("couldn't register xt target\n");
 		goto out_dev;
-- 
2.11.0



^ permalink raw reply related

* [PATCH 01/17] netfilter: synproxy: rename mss synproxy_options field
From: Pablo Neira Ayuso @ 2019-08-13 18:36 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <20190813183701.4002-1-pablo@netfilter.org>

From: Fernando Fernandez Mancera <ffmancera@riseup.net>

After introduce "mss_encode" field in the synproxy_options struct the field
"mss" is a little confusing. It has been renamed to "mss_option".

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_synproxy.h | 2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c             | 4 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c            | 4 ++--
 net/netfilter/nf_synproxy_core.c              | 8 ++++----
 net/netfilter/nft_synproxy.c                  | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 44513b93bd55..2f0171d24997 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -67,7 +67,7 @@ static inline struct synproxy_net *synproxy_pernet(struct net *net)
 struct synproxy_options {
 	u8				options;
 	u8				wscale;
-	u16				mss;
+	u16				mss_option;
 	u16				mss_encode;
 	u32				tsval;
 	u32				tsecr;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 0e70f3f65f6f..748dc3ce58d3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
-		opts.mss_encode = opts.mss;
-		opts.mss = info->mss;
+		opts.mss_encode = opts.mss_option;
+		opts.mss_option = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5cdb4a69d277..fd1f52a21bf1 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
-		opts.mss_encode = opts.mss;
-		opts.mss = info->mss;
+		opts.mss_encode = opts.mss_option;
+		opts.mss_option = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c769462a839e..b0930d4aba22 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -56,7 +56,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
 			switch (opcode) {
 			case TCPOPT_MSS:
 				if (opsize == TCPOLEN_MSS) {
-					opts->mss = get_unaligned_be16(ptr);
+					opts->mss_option = get_unaligned_be16(ptr);
 					opts->options |= NF_SYNPROXY_OPT_MSS;
 				}
 				break;
@@ -115,7 +115,7 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
 	if (options & NF_SYNPROXY_OPT_MSS)
 		*ptr++ = htonl((TCPOPT_MSS << 24) |
 			       (TCPOLEN_MSS << 16) |
-			       opts->mss);
+			       opts->mss_option);
 
 	if (options & NF_SYNPROXY_OPT_TIMESTAMP) {
 		if (options & NF_SYNPROXY_OPT_SACK_PERM)
@@ -642,7 +642,7 @@ synproxy_recv_client_ack(struct net *net,
 	}
 
 	this_cpu_inc(snet->stats->cookie_valid);
-	opts->mss = mss;
+	opts->mss_option = mss;
 	opts->options |= NF_SYNPROXY_OPT_MSS;
 
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
@@ -1060,7 +1060,7 @@ synproxy_recv_client_ack_ipv6(struct net *net,
 	}
 
 	this_cpu_inc(snet->stats->cookie_valid);
-	opts->mss = mss;
+	opts->mss_option = mss;
 	opts->options |= NF_SYNPROXY_OPT_MSS;
 
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 928e661d1517..db4c23f5dfcb 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -31,8 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
 		opts->options |= NF_SYNPROXY_OPT_ECN;
 
 	opts->options &= priv->info.options;
-	opts->mss_encode = opts->mss;
-	opts->mss = info->mss;
+	opts->mss_encode = opts->mss_option;
+	opts->mss_option = info->mss;
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
 		synproxy_init_timestamp_cookie(info, opts);
 	else
-- 
2.11.0



^ permalink raw reply related

* Re: [PATCH bpf-next 2/3] xdp: xdp_umem: replace kmap on vmap for umem map
From: Jonathan Lemon @ 2019-08-13 18:33 UTC (permalink / raw)
  To: Ivan Khoronzhuk
  Cc: magnus.karlsson, bjorn.topel, davem, hawk, john.fastabend,
	jakub.kicinski, daniel, netdev, bpf, xdp-newbies, linux-kernel
In-Reply-To: <20190813183023.GA2856@khorivan>

On 13 Aug 2019, at 11:30, Ivan Khoronzhuk wrote:

> On Tue, Aug 13, 2019 at 10:42:18AM -0700, Jonathan Lemon wrote:
>>
>>
>> On 13 Aug 2019, at 3:23, Ivan Khoronzhuk wrote:
>>
>>> For 64-bit there is no reason to use vmap/vunmap, so use 
>>> page_address
>>> as it was initially. For 32 bits, in some apps, like in samples
>>> xdpsock_user.c when number of pgs in use is quite big, the kmap
>>> memory can be not enough, despite on this, kmap looks like is
>>> deprecated in such cases as it can block and should be used rather
>>> for dynamic mm.
>>>
>>> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
>>
>> Seems a bit overkill - if not high memory, kmap() falls back
>> to just page_address(), unlike vmap().
>
>> -- Jonathan
>
> So, as kmap has limitation... if I correctly understood, you propose
> to avoid macros and do smth like kmap:
>
> 	void *addr;
> 	if (!PageHighMem(&umem->pgs[i]))
> 		addr =  page_address(page);
> 	else
> 		addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
>
> 	umem->pages[i].addr = addr;
>
> and while unmap
>
> 	if (!PageHighMem(&umem->pgs[i]))
> 		vunmap(umem->pages[i].addr);
>
> I can try it, and add this in v2 if no objection.

Seems like a reasonable compromise to me.
-- 
Jonathan


>
>>
>>> ---
>>> net/xdp/xdp_umem.c | 16 ++++++++++++----
>>> 1 file changed, 12 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
>>> index a0607969f8c0..907c9019fe21 100644
>>> --- a/net/xdp/xdp_umem.c
>>> +++ b/net/xdp/xdp_umem.c
>>> @@ -14,7 +14,7 @@
>>> #include <linux/netdevice.h>
>>> #include <linux/rtnetlink.h>
>>> #include <linux/idr.h>
>>> -#include <linux/highmem.h>
>>> +#include <linux/vmalloc.h>
>>>
>>> #include "xdp_umem.h"
>>> #include "xsk_queue.h"
>>> @@ -167,10 +167,12 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
>>>
>>> static void xdp_umem_unmap_pages(struct xdp_umem *umem)
>>> {
>>> +#if BITS_PER_LONG == 32
>>> 	unsigned int i;
>>>
>>> 	for (i = 0; i < umem->npgs; i++)
>>> -		kunmap(umem->pgs[i]);
>>> +		vunmap(umem->pages[i].addr);
>>> +#endif
>>> }
>>>
>>> static void xdp_umem_unpin_pages(struct xdp_umem *umem)
>>> @@ -378,8 +380,14 @@ static int xdp_umem_reg(struct xdp_umem *umem, 
>>> struct xdp_umem_reg *mr)
>>> 		goto out_account;
>>> 	}
>>>
>>> -	for (i = 0; i < umem->npgs; i++)
>>> -		umem->pages[i].addr = kmap(umem->pgs[i]);
>>> +	for (i = 0; i < umem->npgs; i++) {
>>> +#if BITS_PER_LONG == 32
>>> +		umem->pages[i].addr = vmap(&umem->pgs[i], 1, VM_MAP,
>>> +					   PAGE_KERNEL);
>>> +#else
>>> +		umem->pages[i].addr = page_address(umem->pgs[i]);
>>> +#endif
>>> +	}
>>>
>>> 	return 0;
>>>
>>> -- 
>>> 2.17.1
>
> -- 
> Regards,
> Ivan Khoronzhuk

^ permalink raw reply

* Re: [PATCH bpf-next 2/3] xdp: xdp_umem: replace kmap on vmap for umem map
From: Ivan Khoronzhuk @ 2019-08-13 18:30 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: magnus.karlsson, bjorn.topel, davem, hawk, john.fastabend,
	jakub.kicinski, daniel, netdev, bpf, xdp-newbies, linux-kernel
In-Reply-To: <9F98648A-8654-4767-97B5-CF4BC939393C@flugsvamp.com>

On Tue, Aug 13, 2019 at 10:42:18AM -0700, Jonathan Lemon wrote:
>
>
>On 13 Aug 2019, at 3:23, Ivan Khoronzhuk wrote:
>
>>For 64-bit there is no reason to use vmap/vunmap, so use page_address
>>as it was initially. For 32 bits, in some apps, like in samples
>>xdpsock_user.c when number of pgs in use is quite big, the kmap
>>memory can be not enough, despite on this, kmap looks like is
>>deprecated in such cases as it can block and should be used rather
>>for dynamic mm.
>>
>>Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
>
>Seems a bit overkill - if not high memory, kmap() falls back
>to just page_address(), unlike vmap().

>-- Jonathan

So, as kmap has limitation... if I correctly understood, you propose
to avoid macros and do smth like kmap:

	void *addr;
	if (!PageHighMem(&umem->pgs[i]))
		addr =  page_address(page);
	else
		addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);

	umem->pages[i].addr = addr;

and while unmap

	if (!PageHighMem(&umem->pgs[i]))
		vunmap(umem->pages[i].addr);

I can try it, and add this in v2 if no objection.

>
>>---
>> net/xdp/xdp_umem.c | 16 ++++++++++++----
>> 1 file changed, 12 insertions(+), 4 deletions(-)
>>
>>diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
>>index a0607969f8c0..907c9019fe21 100644
>>--- a/net/xdp/xdp_umem.c
>>+++ b/net/xdp/xdp_umem.c
>>@@ -14,7 +14,7 @@
>> #include <linux/netdevice.h>
>> #include <linux/rtnetlink.h>
>> #include <linux/idr.h>
>>-#include <linux/highmem.h>
>>+#include <linux/vmalloc.h>
>>
>> #include "xdp_umem.h"
>> #include "xsk_queue.h"
>>@@ -167,10 +167,12 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
>>
>> static void xdp_umem_unmap_pages(struct xdp_umem *umem)
>> {
>>+#if BITS_PER_LONG == 32
>> 	unsigned int i;
>>
>> 	for (i = 0; i < umem->npgs; i++)
>>-		kunmap(umem->pgs[i]);
>>+		vunmap(umem->pages[i].addr);
>>+#endif
>> }
>>
>> static void xdp_umem_unpin_pages(struct xdp_umem *umem)
>>@@ -378,8 +380,14 @@ static int xdp_umem_reg(struct xdp_umem *umem, 
>>struct xdp_umem_reg *mr)
>> 		goto out_account;
>> 	}
>>
>>-	for (i = 0; i < umem->npgs; i++)
>>-		umem->pages[i].addr = kmap(umem->pgs[i]);
>>+	for (i = 0; i < umem->npgs; i++) {
>>+#if BITS_PER_LONG == 32
>>+		umem->pages[i].addr = vmap(&umem->pgs[i], 1, VM_MAP,
>>+					   PAGE_KERNEL);
>>+#else
>>+		umem->pages[i].addr = page_address(umem->pgs[i]);
>>+#endif
>>+	}
>>
>> 	return 0;
>>
>>-- 
>>2.17.1

-- 
Regards,
Ivan Khoronzhuk

^ permalink raw reply

* Re: general protection fault in tls_write_space
From: John Fastabend @ 2019-08-13 18:30 UTC (permalink / raw)
  To: Jakub Kicinski, John Fastabend
  Cc: Hillf Danton, syzbot, aviadye, borisp, daniel, davejwatson, davem,
	linux-kernel, netdev, oss-drivers, syzkaller-bugs, willemb
In-Reply-To: <20190813102705.1f312b67@cakuba.netronome.com>

Jakub Kicinski wrote:
> On Tue, 13 Aug 2019 10:17:06 -0700, John Fastabend wrote:
> > > Followup of commit 95fa145479fb
> > > ("bpf: sockmap/tls, close can race with map free")
> > > 
> > > --- a/net/tls/tls_main.c
> > > +++ b/net/tls/tls_main.c
> > > @@ -308,6 +308,9 @@ static void tls_sk_proto_close(struct so
> > >  	if (free_ctx)
> > >  		icsk->icsk_ulp_data = NULL;
> > >  	sk->sk_prot = ctx->sk_proto;
> > > +	/* tls will go; restore sock callback before enabling bh */
> > > +	if (sk->sk_write_space == tls_write_space)
> > > +		sk->sk_write_space = ctx->sk_write_space;
> > >  	write_unlock_bh(&sk->sk_callback_lock);
> > >  	release_sock(sk);
> > >  	if (ctx->tx_conf == TLS_SW)  
> > 
> > Hi Hillf,
> > 
> > We need this patch (although slightly updated for bpf tree) do
> > you want to send it? Otherwise I can. We should only set this if
> > TX path was enabled otherwise we null it. Checking against
> > tls_write_space seems best to me as well.
> > 
> > Against bpf this patch should fix it.
> > 
> > diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> > index ce6ef56a65ef..43252a801c3f 100644
> > --- a/net/tls/tls_main.c
> > +++ b/net/tls/tls_main.c
> > @@ -308,7 +308,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
> >         if (free_ctx)
> >                 icsk->icsk_ulp_data = NULL;
> >         sk->sk_prot = ctx->sk_proto;
> > -       sk->sk_write_space = ctx->sk_write_space;
> > +       if (sk->sk_write_space == tls_write_space)
> > +               sk->sk_write_space = ctx->sk_write_space;
> >         write_unlock_bh(&sk->sk_callback_lock);
> >         release_sock(sk);
> >         if (ctx->tx_conf == TLS_SW)
> 
> This is already in net since Friday:

Don't we need to guard that with an

  if (sk->sk_write_space == tls_write_space)

or something similar? Where is ctx->sk_write_space set in the rx only
case? In do_tls_setsockop_conf() we have this block

	if (tx) {
		ctx->sk_write_space = sk->sk_write_space;
		sk->sk_write_space = tls_write_space;
	} else {
		sk->sk_socket->ops = &tls_sw_proto_ops;
	}

which makes me think ctx->sk_write_space may not be set correctly in
all cases.

Thanks.

> 
> commit 57c722e932cfb82e9820bbaae1b1f7222ea97b52
> Author: Jakub Kicinski <jakub.kicinski@netronome.com>
> Date:   Fri Aug 9 18:36:23 2019 -0700
> 
>     net/tls: swap sk_write_space on close
>     
>     Now that we swap the original proto and clear the ULP pointer
>     on close we have to make sure no callback will try to access
>     the freed state. sk_write_space is not part of sk_prot, remember
>     to swap it.
>     
>     Reported-by: syzbot+dcdc9deefaec44785f32@syzkaller.appspotmail.com
>     Fixes: 95fa145479fb ("bpf: sockmap/tls, close can race with map free")
>     Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
>     Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> index 9cbbae606ced..ce6ef56a65ef 100644
> --- a/net/tls/tls_main.c
> +++ b/net/tls/tls_main.c
> @@ -308,6 +308,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
>         if (free_ctx)
>                 icsk->icsk_ulp_data = NULL;
>         sk->sk_prot = ctx->sk_proto;
> +       sk->sk_write_space = ctx->sk_write_space;
>         write_unlock_bh(&sk->sk_callback_lock);
>         release_sock(sk);
>         if (ctx->tx_conf == TLS_SW)


^ permalink raw reply

* Re: libbpf distro packaging
From: Andrii Nakryiko @ 2019-08-13 18:26 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Jiri Olsa, Julia Kartseva, labbott@redhat.com, acme@kernel.org,
	debian-kernel@lists.debian.org, netdev@vger.kernel.org,
	Andrii Nakryiko, Andrey Ignatov, Alexei Starovoitov,
	Yonghong Song, jolsa@kernel.org
In-Reply-To: <eb5cf65a-1aa0-fde4-e726-41a736cb7314@iogearbox.net>

On Tue, Aug 13, 2019 at 7:14 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 8/13/19 2:24 PM, Jiri Olsa wrote:
> > On Mon, Aug 12, 2019 at 07:04:12PM +0000, Julia Kartseva wrote:
> >> I would like to bring up libbpf publishing discussion started at [1].
> >> The present state of things is that libbpf is built from kernel tree, e.g. [2]
> >> For Debian and [3] for Fedora whereas the better way would be having a
> >> package built from github mirror. The advantages of the latter:
> >> - Consistent, ABI matching versioning across distros
> >> - The mirror has integration tests
> >> - No need in kernel tree to build a package
> >> - Changes can be merged directly to github w/o waiting them to be merged
> >> through bpf-next -> net-next -> main
> >> There is a PR introducing a libbpf.spec which can be used as a starting point: [4]
> >> Any comments regarding the spec itself can be posted there.
> >> In the future it may be used as a source of truth.
> >> Please consider switching libbpf packaging to the github mirror instead
> >> of the kernel tree.
> >> Thanks
> >>
> >> [1] https://lists.iovisor.org/g/iovisor-dev/message/1521
> >> [2] https://packages.debian.org/sid/libbpf4.19
> >> [3] http://rpmfind.net/linux/RPM/fedora/devel/rawhide/x86_64/l/libbpf-5.3.0-0.rc2.git0.1.fc31.x86_64.html
> >> [4] https://github.com/libbpf/libbpf/pull/64
> >
> > hi,
> > Fedora has libbpf as kernel-tools subpackage, so I think
> > we'd need to create new package and deprecate the current
> >
> > but I like the ABI stability by using github .. how's actually
> > the sync (in both directions) with kernel sources going on?
>
> The upstream kernel's tools/lib/bpf/ is always source of truth. Meaning, changes need
> to make it upstream first and they are later synced into the GH stand-alone repo.

As I mentioned in reply to Jiri, kernel's tools/lib/bpf are the source
of truth for the sources of libbpf itself, but Github has some extra
stuff necessary to make libbpf work/build in isolation from kernel.
Plus some administrative stuff (e.g., sync script).

So if this spec is geared towards Github layout and for use with
Github projection of libbpf, maybe it makes more sense to keep it in
Github only? Is that spec going to be useful in kernel sources? Or
will it just create more confusion on why it's there?

Plus it will make it easier to synchronize version bumping/tagging of
new release on Github.

>
> Thanks,
> Daniel

^ permalink raw reply

* Re: libbpf distro packaging
From: Andrii Nakryiko @ 2019-08-13 18:23 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Julia Kartseva, labbott@redhat.com, acme@kernel.org,
	debian-kernel@lists.debian.org, netdev@vger.kernel.org,
	Andrii Nakryiko, Andrey Ignatov, Alexei Starovoitov,
	Yonghong Song, jolsa@kernel.org
In-Reply-To: <20190813122420.GB9349@krava>

On Tue, Aug 13, 2019 at 5:26 AM Jiri Olsa <jolsa@redhat.com> wrote:
>
> On Mon, Aug 12, 2019 at 07:04:12PM +0000, Julia Kartseva wrote:
> > I would like to bring up libbpf publishing discussion started at [1].
> > The present state of things is that libbpf is built from kernel tree, e.g. [2]
> > For Debian and [3] for Fedora whereas the better way would be having a
> > package built from github mirror. The advantages of the latter:
> > - Consistent, ABI matching versioning across distros
> > - The mirror has integration tests
> > - No need in kernel tree to build a package
> > - Changes can be merged directly to github w/o waiting them to be merged
> > through bpf-next -> net-next -> main
> > There is a PR introducing a libbpf.spec which can be used as a starting point: [4]
> > Any comments regarding the spec itself can be posted there.
> > In the future it may be used as a source of truth.
> > Please consider switching libbpf packaging to the github mirror instead
> > of the kernel tree.
> > Thanks
> >
> > [1] https://lists.iovisor.org/g/iovisor-dev/message/1521
> > [2] https://packages.debian.org/sid/libbpf4.19
> > [3] http://rpmfind.net/linux/RPM/fedora/devel/rawhide/x86_64/l/libbpf-5.3.0-0.rc2.git0.1.fc31.x86_64.html
> > [4] https://github.com/libbpf/libbpf/pull/64
>
> hi,
> Fedora has libbpf as kernel-tools subpackage, so I think
> we'd need to create new package and deprecate the current
>
> but I like the ABI stability by using github .. how's actually
> the sync (in both directions) with kernel sources going on?

Sync is always in one direction, from kernel sources into Github repo.
Right now it's triggered by a human (usually me), but we are using a
script that automates entire process (see
https://github.com/libbpf/libbpf/blob/master/scripts/sync-kernel.sh).
It cherry-pick relevant commits from kernel, transforms them to match
Github's file layout and re-applies those changes to Github repo.

There is never a sync from Github back to kernel, but Github repo
contains some extra stuff that's not in kernel. E.g., the script I
mentioned, plus Github's Makefile is different, because it can't rely
on kernel's kbuild setup.

>
> thanks,
> jirka

^ permalink raw reply

* [PATCH net-next 5/5] rds: check for excessive looping in rds_send_xmit
From: Gerd Rausch @ 2019-08-13 18:21 UTC (permalink / raw)
  To: Santosh Shilimkar, netdev, linux-rdma, rds-devel; +Cc: David Miller

From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 13 Jan 2011 11:40:31 -0800

Original commit from 2011 updated to include a change by
Yuval Shaia <yuval.shaia@oracle.com>
that adds a new statistic counter "send_stuck_rm"
to capture the messages looping exessively
in the send path.

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
---
 net/rds/rds.h   |  2 +-
 net/rds/send.c  | 12 ++++++++++++
 net/rds/stats.c |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/net/rds/rds.h b/net/rds/rds.h
index f0066d168499..ad605fd61655 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -717,7 +717,7 @@ struct rds_statistics {
 	uint64_t	s_cong_send_blocked;
 	uint64_t	s_recv_bytes_added_to_socket;
 	uint64_t	s_recv_bytes_removed_from_socket;
-
+	uint64_t	s_send_stuck_rm;
 };
 
 /* af_rds.c */
diff --git a/net/rds/send.c b/net/rds/send.c
index 031b1e97a466..9ce552abf9e9 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -145,6 +145,7 @@ int rds_send_xmit(struct rds_conn_path *cp)
 	LIST_HEAD(to_be_dropped);
 	int batch_count;
 	unsigned long send_gen = 0;
+	int same_rm = 0;
 
 restart:
 	batch_count = 0;
@@ -200,6 +201,17 @@ int rds_send_xmit(struct rds_conn_path *cp)
 
 		rm = cp->cp_xmit_rm;
 
+		if (!rm) {
+			same_rm = 0;
+		} else {
+			same_rm++;
+			if (same_rm >= 4096) {
+				rds_stats_inc(s_send_stuck_rm);
+				ret = -EAGAIN;
+				break;
+			}
+		}
+
 		/*
 		 * If between sending messages, we can send a pending congestion
 		 * map update.
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 6bbab4d74c4f..9e87da43c004 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -78,6 +78,7 @@ static const char *const rds_stat_names[] = {
 	"cong_send_blocked",
 	"recv_bytes_added_to_sock",
 	"recv_bytes_freed_fromsock",
+	"send_stuck_rm",
 };
 
 void rds_stats_info_copy(struct rds_info_iterator *iter,
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 4/5] net/rds: Add a few missing rds_stat_names entries
From: Gerd Rausch @ 2019-08-13 18:21 UTC (permalink / raw)
  To: Santosh Shilimkar, netdev, linux-rdma, rds-devel; +Cc: David Miller

Date: Thu, 11 Jul 2019 12:15:50 -0700

In a previous commit, fields were added to "struct rds_statistics"
but array "rds_stat_names" was not updated accordingly.

Please note the inconsistent naming of the string representations
that is done in the name of compatibility
with the Oracle internal code-base.

s_recv_bytes_added_to_socket     -> "recv_bytes_added_to_sock"
s_recv_bytes_removed_from_socket -> "recv_bytes_freed_fromsock"

Fixes: 192a798f5299 ("RDS: add stat for socket recv memory usage")
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
---
 net/rds/stats.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/rds/stats.c b/net/rds/stats.c
index 73be187d389e..6bbab4d74c4f 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -76,6 +76,8 @@ static const char *const rds_stat_names[] = {
 	"cong_update_received",
 	"cong_send_error",
 	"cong_send_blocked",
+	"recv_bytes_added_to_sock",
+	"recv_bytes_freed_fromsock",
 };
 
 void rds_stats_info_copy(struct rds_info_iterator *iter,
-- 
2.22.0



^ permalink raw reply related

* [PATCH net-next 3/5] RDS: don't use GFP_ATOMIC for sk_alloc in rds_create
From: Gerd Rausch @ 2019-08-13 18:21 UTC (permalink / raw)
  To: Santosh Shilimkar, netdev, linux-rdma, rds-devel; +Cc: David Miller

From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 3 Feb 2012 11:08:51 -0500

Signed-off-by: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Somasundaram Krishnasamy <somasundaram.krishnasamy@oracle.com>
---
 net/rds/af_rds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2b969f99ef13..7228892046cf 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -705,7 +705,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
 	if (sock->type != SOCK_SEQPACKET || protocol)
 		return -ESOCKTNOSUPPORT;
 
-	sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
+	sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
 	if (!sk)
 		return -ENOMEM;
 
-- 
2.22.0



^ permalink raw reply related

* [PATCH net-next 1/5] RDS: Re-add pf/sol access via sysctl
From: Gerd Rausch @ 2019-08-13 18:20 UTC (permalink / raw)
  To: Santosh Shilimkar, netdev, linux-rdma, rds-devel; +Cc: David Miller

From: Andy Grover <andy.grover@oracle.com>
Date: Tue, 24 Nov 2009 15:35:51 -0800

Although RDS has an official PF_RDS value now, existing software
expects to look for rds sysctls to determine it. We need to maintain
these for now, for backwards compatibility.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
---
 net/rds/sysctl.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index e381bbcd9cc1..9760292a0af4 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -49,6 +49,13 @@ unsigned int  rds_sysctl_max_unacked_bytes = (16 << 20);
 
 unsigned int rds_sysctl_ping_enable = 1;
 
+/*
+ * We have official values, but must maintain the sysctl interface for existing
+ * software that expects to find these values here.
+ */
+static int rds_sysctl_pf_rds = PF_RDS;
+static int rds_sysctl_sol_rds = SOL_RDS;
+
 static struct ctl_table rds_sysctl_rds_table[] = {
 	{
 		.procname       = "reconnect_min_delay_ms",
@@ -68,6 +75,20 @@ static struct ctl_table rds_sysctl_rds_table[] = {
 		.extra1		= &rds_sysctl_reconnect_min_jiffies,
 		.extra2		= &rds_sysctl_reconnect_max,
 	},
+	{
+		.procname       = "pf_rds",
+		.data		= &rds_sysctl_pf_rds,
+		.maxlen         = sizeof(int),
+		.mode           = 0444,
+		.proc_handler   = &proc_dointvec,
+	},
+	{
+		.procname       = "sol_rds",
+		.data		= &rds_sysctl_sol_rds,
+		.maxlen         = sizeof(int),
+		.mode           = 0444,
+		.proc_handler   = &proc_dointvec,
+	},
 	{
 		.procname	= "max_unacked_packets",
 		.data		= &rds_sysctl_max_unacked_packets,
-- 
2.22.0



^ permalink raw reply related

* [PATCH net-next 0/5] net/rds: Fixes from internal Oracle repo
From: Gerd Rausch @ 2019-08-13 18:20 UTC (permalink / raw)
  To: Santosh Shilimkar, netdev, linux-rdma, rds-devel; +Cc: David Miller

This is the first set of (mostly old) patches from our internal repository
in an effort to synchronize what Oracle had been using internally
with what is shipped with the Linux kernel.

Andy Grover (2):
  RDS: Re-add pf/sol access via sysctl
  rds: check for excessive looping in rds_send_xmit

Chris Mason (2):
  RDS: limit the number of times we loop in rds_send_xmit
  RDS: don't use GFP_ATOMIC for sk_alloc in rds_create

Gerd Rausch (1):
  net/rds: Add a few missing rds_stat_names entries

 net/rds/af_rds.c  |  2 +-
 net/rds/ib_recv.c | 12 +++++++++++-
 net/rds/rds.h     |  2 +-
 net/rds/send.c    | 12 ++++++++++++
 net/rds/stats.c   |  3 +++
 net/rds/sysctl.c  | 21 +++++++++++++++++++++
 6 files changed, 49 insertions(+), 3 deletions(-)

-- 
2.22.0


^ permalink raw reply

* [PATCH net-next 2/5] RDS: limit the number of times we loop in rds_send_xmit
From: Gerd Rausch @ 2019-08-13 18:20 UTC (permalink / raw)
  To: Santosh Shilimkar, netdev, linux-rdma, rds-devel; +Cc: David Miller

From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 3 Feb 2012 11:07:54 -0500

This will kick the RDS worker thread if we have been looping
too long.

Original commit from 2012 updated to include a change by
Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
that triggers "must_wake" if "rds_ib_recv_refill_one" fails.

Signed-off-by: Gerd Rausch <gerd.rausch@oracle.com>
---
 net/rds/ib_recv.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 3cae88cbdaa0..1a8a4a760b84 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -385,6 +385,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 	unsigned int posted = 0;
 	int ret = 0;
 	bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
+	bool must_wake = false;
 	u32 pos;
 
 	/* the goal here is to just make sure that someone, somewhere
@@ -405,6 +406,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 		recv = &ic->i_recvs[pos];
 		ret = rds_ib_recv_refill_one(conn, recv, gfp);
 		if (ret) {
+			must_wake = true;
 			break;
 		}
 
@@ -423,6 +425,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 		}
 
 		posted++;
+
+		if ((posted > 128 && need_resched()) || posted > 8192) {
+			must_wake = true;
+			break;
+		}
 	}
 
 	/* We're doing flow control - update the window. */
@@ -445,10 +452,13 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 	 * if we should requeue.
 	 */
 	if (rds_conn_up(conn) &&
-	    ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
+	    (must_wake ||
+	    (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
 	    rds_ib_ring_empty(&ic->i_recv_ring))) {
 		queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
 	}
+	if (can_wait)
+		cond_resched();
 }
 
 /*
-- 
2.22.0



^ permalink raw reply related

* [PATCH 3/3] net: qca: update MODULE_AUTHOR() email address
From: Stefan Wahren @ 2019-08-13 18:17 UTC (permalink / raw)
  To: Jean Delvare, Guenter Roeck, David S. Miller, Srinivas Kandagatla,
	Shawn Guo, Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team
  Cc: linux-hwmon, linux-arm-kernel, linux-kernel, netdev,
	Stefan Wahren
In-Reply-To: <1565720249-6549-1-git-send-email-wahrenst@gmx.net>

I2SE has been acquired by in-tech. So the email address listed in
MODULE_AUTHOR() will be disabled in the near future. I only have access
to QCA7000 boards at in-tech, so use my new company address.

Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
---
 drivers/net/ethernet/qualcomm/qca_7k_common.c | 2 +-
 drivers/net/ethernet/qualcomm/qca_spi.c       | 2 +-
 drivers/net/ethernet/qualcomm/qca_uart.c      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_7k_common.c b/drivers/net/ethernet/qualcomm/qca_7k_common.c
index 6b511f0..87d023d 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k_common.c
+++ b/drivers/net/ethernet/qualcomm/qca_7k_common.c
@@ -162,5 +162,5 @@ EXPORT_SYMBOL_GPL(qcafrm_fsm_decode);

 MODULE_DESCRIPTION("Qualcomm Atheros QCA7000 common");
 MODULE_AUTHOR("Qualcomm Atheros Communications");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <stefan.wahren@in-tech.com>");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index b28360b..d9d3554 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -1034,6 +1034,6 @@ module_spi_driver(qca_spi_driver);

 MODULE_DESCRIPTION("Qualcomm Atheros QCA7000 SPI Driver");
 MODULE_AUTHOR("Qualcomm Atheros Communications");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <stefan.wahren@in-tech.com>");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(QCASPI_DRV_VERSION);
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 5906168..ab4da8e 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -418,6 +418,6 @@ module_serdev_device_driver(qca_uart_driver);

 MODULE_DESCRIPTION("Qualcomm Atheros QCA7000 UART Driver");
 MODULE_AUTHOR("Qualcomm Atheros Communications");
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <stefan.wahren@in-tech.com>");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(QCAUART_DRV_VERSION);
--
2.7.4


^ permalink raw reply related

* [PATCH 2/3] hwmon: raspberrypi: update MODULE_AUTHOR() email address
From: Stefan Wahren @ 2019-08-13 18:17 UTC (permalink / raw)
  To: Jean Delvare, Guenter Roeck, David S. Miller, Srinivas Kandagatla,
	Shawn Guo, Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team
  Cc: linux-hwmon, linux-arm-kernel, linux-kernel, netdev,
	Stefan Wahren
In-Reply-To: <1565720249-6549-1-git-send-email-wahrenst@gmx.net>

The email address listed in MODULE_AUTHOR() will be disabled in the
near future. Replace it with my private one.

Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
---
 drivers/hwmon/raspberrypi-hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/raspberrypi-hwmon.c b/drivers/hwmon/raspberrypi-hwmon.c
index efe4bb1..d3a64a3 100644
--- a/drivers/hwmon/raspberrypi-hwmon.c
+++ b/drivers/hwmon/raspberrypi-hwmon.c
@@ -146,7 +146,7 @@ static struct platform_driver rpi_hwmon_driver = {
 };
 module_platform_driver(rpi_hwmon_driver);

-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <wahrenst@gmx.net>");
 MODULE_DESCRIPTION("Raspberry Pi voltage sensor driver");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:raspberrypi-hwmon");
--
2.7.4


^ permalink raw reply related

* [PATCH 1/3] nvmem: mxs-ocotp: update MODULE_AUTHOR() email address
From: Stefan Wahren @ 2019-08-13 18:17 UTC (permalink / raw)
  To: Jean Delvare, Guenter Roeck, David S. Miller, Srinivas Kandagatla,
	Shawn Guo, Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam,
	NXP Linux Team
  Cc: linux-hwmon, linux-arm-kernel, linux-kernel, netdev,
	Stefan Wahren

The email address listed in MODULE_AUTHOR() will be disabled in the
near future. Replace it with my private one.

Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
---
 drivers/nvmem/mxs-ocotp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c
index c34d9fe..8e4898d 100644
--- a/drivers/nvmem/mxs-ocotp.c
+++ b/drivers/nvmem/mxs-ocotp.c
@@ -200,6 +200,6 @@ static struct platform_driver mxs_ocotp_driver = {
 };

 module_platform_driver(mxs_ocotp_driver);
-MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
+MODULE_AUTHOR("Stefan Wahren <wahrenst@gmx.net");
 MODULE_DESCRIPTION("driver for OCOTP in i.MX23/i.MX28");
 MODULE_LICENSE("GPL v2");
--
2.7.4


^ permalink raw reply related

* Re: [RESEND][PATCH v3 bpf-next] btf: expose BTF info through sysfs
From: Andrii Nakryiko @ 2019-08-13 18:08 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Andrii Nakryiko, bpf, Networking, Alexei Starovoitov, Kernel Team
In-Reply-To: <c4103c58-941c-da3a-9abd-eecbcd256f1d@iogearbox.net>

On Tue, Aug 13, 2019 at 7:20 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 8/12/19 8:39 PM, Andrii Nakryiko wrote:
> > Make .BTF section allocated and expose its contents through sysfs.
> >
> > /sys/kernel/btf directory is created to contain all the BTFs present
> > inside kernel. Currently there is only kernel's main BTF, represented as
> > /sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported,
> > each module will expose its BTF as /sys/kernel/btf/<module-name> file.
> >
> > Current approach relies on a few pieces coming together:
> > 1. pahole is used to take almost final vmlinux image (modulo .BTF and
> >     kallsyms) and generate .BTF section by converting DWARF info into
> >     BTF. This section is not allocated and not mapped to any segment,
> >     though, so is not yet accessible from inside kernel at runtime.
> > 2. objcopy dumps .BTF contents into binary file and subsequently
> >     convert binary file into linkable object file with automatically
> >     generated symbols _binary__btf_kernel_bin_start and
> >     _binary__btf_kernel_bin_end, pointing to start and end, respectively,
> >     of BTF raw data.
> > 3. final vmlinux image is generated by linking this object file (and
> >     kallsyms, if necessary). sysfs_btf.c then creates
> >     /sys/kernel/btf/kernel file and exposes embedded BTF contents through
> >     it. This allows, e.g., libbpf and bpftool access BTF info at
> >     well-known location, without resorting to searching for vmlinux image
> >     on disk (location of which is not standardized and vmlinux image
> >     might not be even available in some scenarios, e.g., inside qemu
> >     during testing).
>
> Small question: given modules will be covered later, would it not be more
> obvious to name it /sys/kernel/btf/vmlinux instead?

vmlinux totally makes sense, not sure why I didn't think about that initially...

I'll follow up with a rename.

>
> > Alternative approach using .incbin assembler directive to embed BTF
> > contents directly was attempted but didn't work, because sysfs_proc.o is
> > not re-compiled during link-vmlinux.sh stage. This is required, though,
> > to update embedded BTF data (initially empty data is embedded, then
> > pahole generates BTF info and we need to regenerate sysfs_btf.o with
> > updated contents, but it's too late at that point).
> >
> > If BTF couldn't be generated due to missing or too old pahole,
> > sysfs_btf.c handles that gracefully by detecting that
> > _binary__btf_kernel_bin_start (weak symbol) is 0 and not creating
> > /sys/kernel/btf at all.
> >
> > v2->v3:
> > - added Documentation/ABI/testing/sysfs-kernel-btf (Greg K-H);
> > - created proper kobject (btf_kobj) for btf directory (Greg K-H);
> > - undo v2 change of reusing vmlinux, as it causes extra kallsyms pass
> >    due to initially missing  __binary__btf_kernel_bin_{start/end} symbols;
> >
> > v1->v2:
> > - allow kallsyms stage to re-use vmlinux generated by gen_btf();
> >
> > Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > Signed-off-by: Andrii Nakryiko <andriin@fb.com>
>
> In any case, this is great progress, applied thanks!

^ permalink raw reply

* Re: [RFC PATCH 4/5] PTP: Add flag for non-periodic output
From: Richard Cochran @ 2019-08-13 18:06 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: netdev, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H . Peter Anvin, x86, linux-kernel, Christopher S . Hall
In-Reply-To: <20190813174821.GC3207@localhost>

On Tue, Aug 13, 2019 at 10:48:21AM -0700, Richard Cochran wrote:
> > +		if (copy_from_user(&req.extts, (void __user *)arg,
> > +				   sizeof(req.extts))) {
> > +			err = -EFAULT;
> > +			break;
> > +		}
> > +		if (req.extts.flags || req.extts.rsv[0]
> > +				|| req.extts.rsv[1]) {
> > +			err = -EINVAL;
> 
> Since the code is mostly the same as in the PTP_EXTTS_REQUEST case,
> maybe just double up the case statements (like in the other) and add
> an extra test for (cmd == PTP_EXTTS_REQUEST2) for this if-block.

Thinking about the drivers, in the case of the legacy ioctls, let's
also be sure to clear the flags and reserved fields before passing
them to the drivers.

Thanks,
Richard

^ permalink raw reply

* Re: [RFC PATCH 0/5] PTP: add support for Intel's TGPIO controller
From: Richard Cochran @ 2019-08-13 17:49 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: Andrew Lunn, netdev, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, H . Peter Anvin, x86, linux-kernel,
	Christopher S . Hall
In-Reply-To: <87wofhy05t.fsf@gmail.com>

On Tue, Aug 13, 2019 at 10:50:06AM +0300, Felipe Balbi wrote:
> If we do that we make it difficult for those reading specification and
> trying to find the matching driver.

+1

Thanks,
Richard

^ permalink raw reply

* Re: [RFC PATCH 4/5] PTP: Add flag for non-periodic output
From: Richard Cochran @ 2019-08-13 17:48 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: netdev, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H . Peter Anvin, x86, linux-kernel, Christopher S . Hall
In-Reply-To: <87tvalxzzi.fsf@gmail.com>

On Tue, Aug 13, 2019 at 10:53:53AM +0300, Felipe Balbi wrote:
> before I send a new series built on top of this change, I thought I'd
> check with you if I'm on the right path. Below you can find my current
> take at the new IOCTLs. I maintained the same exact structures so that
> there's no maintenance burden. Also introduce a new IOCTL for every
> single one of the previously existing ones even though not all of them
> needed changes. The reason for that was just to make it easier for
> libary authors to update their library by a simple sed script adding '2'
> to the end of the IOCTL macro.

Sounds good.  I have a few comments, below...
 
> Let me know if you want anything to be changed or had a different idea
> about any of this. Also, if you prefer that I finish the entire series
> before you review, no worries either ;-)
> 
> Cheers, patch follows:
> 
> From bc2aa511d4c2e2228590fb29604c6c33b56527ad Mon Sep 17 00:00:00 2001
> From: Felipe Balbi <felipe.balbi@linux.intel.com>
> Date: Tue, 13 Aug 2019 10:32:35 +0300
> Subject: [PATCH] PTP: introduce new versions of IOCTLs
> 
> The current version of the IOCTL have a small problem which prevents
> us from extending the API by making use of reserved fields. In these
> new IOCTLs, we are now making sure that flags and rsv fields are zero
> which will allow us to extend the API in the future.
> 
> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
> ---
>  drivers/ptp/ptp_chardev.c      | 105 +++++++++++++++++++++++++++++++++
>  include/uapi/linux/ptp_clock.h |  12 ++++
>  2 files changed, 117 insertions(+)
> 
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 18ffe449efdf..94775073527b 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -126,6 +126,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  	switch (cmd) {
>  
>  	case PTP_CLOCK_GETCAPS:
> +	case PTP_CLOCK_GETCAPS2:
>  		memset(&caps, 0, sizeof(caps));
>  		caps.max_adj = ptp->info->max_adj;
>  		caps.n_alarm = ptp->info->n_alarm;
> @@ -153,6 +154,28 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  		err = ops->enable(ops, &req, enable);
>  		break;
>  
> +	case PTP_EXTTS_REQUEST2:
> +		memset(&req, 0, sizeof(req));

This memset not needed, AFAICT.  Oh wait, you want to keep drivers
from seeing stack data in the unused parts of the union.  That is
fine, but please just do that unconditionally at the top of the
function.

> +		if (copy_from_user(&req.extts, (void __user *)arg,
> +				   sizeof(req.extts))) {
> +			err = -EFAULT;
> +			break;
> +		}
> +		if (req.extts.flags || req.extts.rsv[0]
> +				|| req.extts.rsv[1]) {
> +			err = -EINVAL;

Since the code is mostly the same as in the PTP_EXTTS_REQUEST case,
maybe just double up the case statements (like in the other) and add
an extra test for (cmd == PTP_EXTTS_REQUEST2) for this if-block.

> +			break;
> +		}
> +			
> +		if (req.extts.index >= ops->n_ext_ts) {
> +			err = -EINVAL;
> +			break;
> +		}
> +		req.type = PTP_CLK_REQ_EXTTS;
> +		enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0;
> +		err = ops->enable(ops, &req, enable);
> +		break;
> +
>  	case PTP_PEROUT_REQUEST:
>  		if (copy_from_user(&req.perout, (void __user *)arg,
>  				   sizeof(req.perout))) {
> @@ -168,6 +191,28 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  		err = ops->enable(ops, &req, enable);
>  		break;
>  
> +	case PTP_PEROUT_REQUEST2:
> +		memset(&req, 0, sizeof(req));
> +		if (copy_from_user(&req.perout, (void __user *)arg,
> +				   sizeof(req.perout))) {
> +			err = -EFAULT;
> +			break;
> +		}
> +		if (req.perout.flags || req.perout.rsv[0]
> +				|| req.perout.rsv[1] || req.perout.rsv[2]
> +				|| req.perout.rsv[3]) {
> +			err = -EINVAL;
> +			break;
> +		}

Also this could share code with the legacy ioctl.

> +		if (req.perout.index >= ops->n_per_out) {
> +			err = -EINVAL;
> +			break;
> +		}
> +		req.type = PTP_CLK_REQ_PEROUT;
> +		enable = req.perout.period.sec || req.perout.period.nsec;
> +		err = ops->enable(ops, &req, enable);
> +		break;
> +
>  	case PTP_ENABLE_PPS:
>  		if (!capable(CAP_SYS_TIME))
>  			return -EPERM;
> @@ -176,7 +221,17 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  		err = ops->enable(ops, &req, enable);
>  		break;
>  
> +	case PTP_ENABLE_PPS2:
> +		if (!capable(CAP_SYS_TIME))
> +			return -EPERM;
> +		memset(&req, 0, sizeof(req));

Clearing 'req' unconditionally will make this case the same as the
legacy case.

> +		req.type = PTP_CLK_REQ_PPS;
> +		enable = arg ? 1 : 0;
> +		err = ops->enable(ops, &req, enable);
> +		break;
> +
>  	case PTP_SYS_OFFSET_PRECISE:
> +	case PTP_SYS_OFFSET_PRECISE2:
>  		if (!ptp->info->getcrosststamp) {
>  			err = -EOPNOTSUPP;
>  			break;
> @@ -201,6 +256,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  		break;
>  
>  	case PTP_SYS_OFFSET_EXTENDED:
> +	case PTP_SYS_OFFSET_EXTENDED2:
>  		if (!ptp->info->gettimex64) {
>  			err = -EOPNOTSUPP;
>  			break;
> @@ -232,6 +288,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  		break;
>  
>  	case PTP_SYS_OFFSET:
> +	case PTP_SYS_OFFSET2:
>  		sysoff = memdup_user((void __user *)arg, sizeof(*sysoff));
>  		if (IS_ERR(sysoff)) {
>  			err = PTR_ERR(sysoff);
> @@ -284,6 +341,31 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  			err = -EFAULT;
>  		break;
>  
> +	case PTP_PIN_GETFUNC2:
> +		memset(&pd, 0, sizeof(pd));

This memset is pointless because of the following copy_from_user().

> +		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
> +			err = -EFAULT;
> +			break;
> +		}
> +		if (pd.rsv[0] || pd.rsv[1] || pd.rsv[2]
> +				|| pd.rsv[3] || pd.rsv[4]) {
> +			err = -EINVAL;
> +			break;
> +		}

Again maybe share the code?

> +		pin_index = pd.index;
> +		if (pin_index >= ops->n_pins) {
> +			err = -EINVAL;
> +			break;
> +		}
> +		pin_index = array_index_nospec(pin_index, ops->n_pins);
> +		if (mutex_lock_interruptible(&ptp->pincfg_mux))
> +			return -ERESTARTSYS;
> +		pd = ops->pin_config[pin_index];
> +		mutex_unlock(&ptp->pincfg_mux);
> +		if (!err && copy_to_user((void __user *)arg, &pd, sizeof(pd)))
> +			err = -EFAULT;
> +		break;
> +
>  	case PTP_PIN_SETFUNC:
>  		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
>  			err = -EFAULT;
> @@ -301,6 +383,29 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  		mutex_unlock(&ptp->pincfg_mux);
>  		break;
>  
> +	case PTP_PIN_SETFUNC2:
> +		memset(&pd, 0, sizeof(pd));

memset not needed here either.

> +		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
> +			err = -EFAULT;
> +			break;
> +		}
> +		if (pd.rsv[0] || pd.rsv[1] || pd.rsv[2]
> +				|| pd.rsv[3] || pd.rsv[4]) {
> +			err = -EINVAL;
> +			break;
> +		}

also shareable.

Thanks,
Richard

> +		pin_index = pd.index;
> +		if (pin_index >= ops->n_pins) {
> +			err = -EINVAL;
> +			break;
> +		}
> +		pin_index = array_index_nospec(pin_index, ops->n_pins);
> +		if (mutex_lock_interruptible(&ptp->pincfg_mux))
> +			return -ERESTARTSYS;
> +		err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan);
> +		mutex_unlock(&ptp->pincfg_mux);
> +		break;
> +
>  	default:
>  		err = -ENOTTY;
>  		break;
> diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
> index 1bc794ad957a..039cd62ec706 100644
> --- a/include/uapi/linux/ptp_clock.h
> +++ b/include/uapi/linux/ptp_clock.h
> @@ -149,6 +149,18 @@ struct ptp_pin_desc {
>  #define PTP_SYS_OFFSET_EXTENDED \
>  	_IOWR(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended)
>  
> +#define PTP_CLOCK_GETCAPS2  _IOR(PTP_CLK_MAGIC, 10, struct ptp_clock_caps)
> +#define PTP_EXTTS_REQUEST2  _IOW(PTP_CLK_MAGIC, 11, struct ptp_extts_request)
> +#define PTP_PEROUT_REQUEST2 _IOW(PTP_CLK_MAGIC, 12, struct ptp_perout_request)
> +#define PTP_ENABLE_PPS2     _IOW(PTP_CLK_MAGIC, 13, int)
> +#define PTP_SYS_OFFSET2     _IOW(PTP_CLK_MAGIC, 14, struct ptp_sys_offset)
> +#define PTP_PIN_GETFUNC2    _IOWR(PTP_CLK_MAGIC, 15, struct ptp_pin_desc)
> +#define PTP_PIN_SETFUNC2    _IOW(PTP_CLK_MAGIC, 16, struct ptp_pin_desc)
> +#define PTP_SYS_OFFSET_PRECISE2 \
> +	_IOWR(PTP_CLK_MAGIC, 17, struct ptp_sys_offset_precise)
> +#define PTP_SYS_OFFSET_EXTENDED2 \
> +	_IOWR(PTP_CLK_MAGIC, 18, struct ptp_sys_offset_extended)
> +
>  struct ptp_extts_event {
>  	struct ptp_clock_time t; /* Time event occured. */
>  	unsigned int index;      /* Which channel produced the event. */
> -- 
> 2.22.0
> 
> 
> 
> -- 
> balbi

^ permalink raw reply

* Re: [patch net-next v3 0/3] net: devlink: Finish network namespace support
From: Jakub Kicinski @ 2019-08-13 17:45 UTC (permalink / raw)
  To: David Ahern; +Cc: Jiri Pirko, netdev, davem, stephen, mlxsw
In-Reply-To: <a9fa6f7f-7981-6077-106d-fa2abfc7397c@gmail.com>

On Mon, 12 Aug 2019 19:46:57 -0600, David Ahern wrote:
> On 8/12/19 7:11 PM, Jakub Kicinski wrote:
> > If the devlink instance just disappeared - that'd be a very very strange
> > thing. Only software objects disappear with the namespace. 
> > Netdevices without ->rtnl_link_ops go back to init_net.  
> 
> netdevsim still has rtnl_link_ops:
> 
> static struct rtnl_link_ops nsim_link_ops __read_mostly = {
>         .kind           = DRV_NAME,
>         .validate       = nsim_validate,
> };

The test harness is the only devlink instance which may 
conceivably have link ops. And implementing the behaviour 
you ask for would require core changes.

We are back to the precedent by test harness argument :(

^ permalink raw reply

* [PATCH net-next] page_pool: fix logic in __page_pool_get_cached
From: Jonathan Lemon @ 2019-08-13 17:45 UTC (permalink / raw)
  To: netdev, davem
  Cc: brouer, ilias.apalodimas, saeedm, ttoukan.linux, kernel-team

__page_pool_get_cached() will return NULL when the ring is
empty, even if there are pages present in the lookaside cache.

It is also possible to refill the cache, and then return a
NULL page.

Restructure the logic so eliminate both cases.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 net/core/page_pool.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 68510eb869ea..de09a74a39a4 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -82,12 +82,9 @@ EXPORT_SYMBOL(page_pool_create);
 static struct page *__page_pool_get_cached(struct page_pool *pool)
 {
 	struct ptr_ring *r = &pool->ring;
+	bool refill = false;
 	struct page *page;
 
-	/* Quicker fallback, avoid locks when ring is empty */
-	if (__ptr_ring_empty(r))
-		return NULL;
-
 	/* Test for safe-context, caller should provide this guarantee */
 	if (likely(in_serving_softirq())) {
 		if (likely(pool->alloc.count)) {
@@ -95,27 +92,23 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
 			page = pool->alloc.cache[--pool->alloc.count];
 			return page;
 		}
-		/* Slower-path: Alloc array empty, time to refill
-		 *
-		 * Open-coded bulk ptr_ring consumer.
-		 *
-		 * Discussion: the ring consumer lock is not really
-		 * needed due to the softirq/NAPI protection, but
-		 * later need the ability to reclaim pages on the
-		 * ring. Thus, keeping the locks.
-		 */
-		spin_lock(&r->consumer_lock);
-		while ((page = __ptr_ring_consume(r))) {
-			if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
-				break;
-			pool->alloc.cache[pool->alloc.count++] = page;
-		}
-		spin_unlock(&r->consumer_lock);
-		return page;
+		refill = true;
 	}
 
-	/* Slow-path: Get page from locked ring queue */
-	page = ptr_ring_consume(&pool->ring);
+	/* Quicker fallback, avoid locks when ring is empty */
+	if (__ptr_ring_empty(r))
+		return NULL;
+
+	/* Slow-path: Get page from locked ring queue,
+	 * refill alloc array if requested.
+	 */
+	spin_lock(&r->consumer_lock);
+	page = __ptr_ring_consume(r);
+	if (refill)
+		pool->alloc.count = __ptr_ring_consume_batched(r,
+							pool->alloc.cache,
+							PP_ALLOC_CACHE_REFILL);
+	spin_unlock(&r->consumer_lock);
 	return page;
 }
 
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH bpf-next 2/3] xdp: xdp_umem: replace kmap on vmap for umem map
From: Jonathan Lemon @ 2019-08-13 17:42 UTC (permalink / raw)
  To: Ivan Khoronzhuk
  Cc: magnus.karlsson, bjorn.topel, davem, hawk, john.fastabend,
	jakub.kicinski, daniel, netdev, bpf, xdp-newbies, linux-kernel
In-Reply-To: <20190813102318.5521-3-ivan.khoronzhuk@linaro.org>



On 13 Aug 2019, at 3:23, Ivan Khoronzhuk wrote:

> For 64-bit there is no reason to use vmap/vunmap, so use page_address
> as it was initially. For 32 bits, in some apps, like in samples
> xdpsock_user.c when number of pgs in use is quite big, the kmap
> memory can be not enough, despite on this, kmap looks like is
> deprecated in such cases as it can block and should be used rather
> for dynamic mm.
>
> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>

Seems a bit overkill - if not high memory, kmap() falls back
to just page_address(), unlike vmap().
-- 
Jonathan

> ---
>  net/xdp/xdp_umem.c | 16 ++++++++++++----
>  1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
> index a0607969f8c0..907c9019fe21 100644
> --- a/net/xdp/xdp_umem.c
> +++ b/net/xdp/xdp_umem.c
> @@ -14,7 +14,7 @@
>  #include <linux/netdevice.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/idr.h>
> -#include <linux/highmem.h>
> +#include <linux/vmalloc.h>
>
>  #include "xdp_umem.h"
>  #include "xsk_queue.h"
> @@ -167,10 +167,12 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
>
>  static void xdp_umem_unmap_pages(struct xdp_umem *umem)
>  {
> +#if BITS_PER_LONG == 32
>  	unsigned int i;
>
>  	for (i = 0; i < umem->npgs; i++)
> -		kunmap(umem->pgs[i]);
> +		vunmap(umem->pages[i].addr);
> +#endif
>  }
>
>  static void xdp_umem_unpin_pages(struct xdp_umem *umem)
> @@ -378,8 +380,14 @@ static int xdp_umem_reg(struct xdp_umem *umem, 
> struct xdp_umem_reg *mr)
>  		goto out_account;
>  	}
>
> -	for (i = 0; i < umem->npgs; i++)
> -		umem->pages[i].addr = kmap(umem->pgs[i]);
> +	for (i = 0; i < umem->npgs; i++) {
> +#if BITS_PER_LONG == 32
> +		umem->pages[i].addr = vmap(&umem->pgs[i], 1, VM_MAP,
> +					   PAGE_KERNEL);
> +#else
> +		umem->pages[i].addr = page_address(umem->pgs[i]);
> +#endif
> +	}
>
>  	return 0;
>
> -- 
> 2.17.1

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox